morton_swizzle: Implement texture formats in UNSWIZZLE_TABLE
* I can now remove that loop that has been messing with my OCD
This commit is contained in:
		| @@ -106,11 +106,11 @@ public: | ||||
|         return cptr; | ||||
|     } | ||||
|  | ||||
|     std::span<std::byte> GetBytes(std::size_t size) { | ||||
|     auto GetWriteBytes(std::size_t size) { | ||||
|         return std::span{reinterpret_cast<std::byte*>(cptr), size > csize ? csize : size}; | ||||
|     } | ||||
|  | ||||
|     std::span<const std::byte> GetBytes(std::size_t size) const { | ||||
|     auto GetReadBytes(std::size_t size) const { | ||||
|         return std::span{reinterpret_cast<const std::byte*>(cptr), size > csize ? csize : size}; | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -11,7 +11,7 @@ namespace VideoCore { | ||||
|  | ||||
| void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { | ||||
|     const u32 page_start = addr >> Memory::CITRA_PAGE_BITS; | ||||
|     const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS); | ||||
|     const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1; | ||||
|  | ||||
|     u32 uncache_start_addr = 0; | ||||
|     u32 cache_start_addr = 0; | ||||
|   | ||||
| @@ -27,16 +27,14 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa | ||||
| void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | ||||
|     DEBUG_ASSERT(load_start >= addr && load_end <= end); | ||||
|  | ||||
|     // We start reading from addr instead of load_start, otherwise LookupTexture | ||||
|     // in UnswizzleTexture will not sample texels correctly | ||||
|     auto source_ptr = VideoCore::g_memory->GetPhysicalRef(addr); | ||||
|     auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start); | ||||
|     if (!source_ptr) [[unlikely]] { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     const auto start_offset = load_start - addr; | ||||
|     const auto texture_data = source_ptr.GetBytes(load_end - addr); | ||||
|     const auto upload_size = texture_data.size() - start_offset; | ||||
|     const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); | ||||
|     const auto upload_size = static_cast<u32>(upload_data.size()); | ||||
|  | ||||
|     if (gl_buffer.empty()) { | ||||
|         gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); | ||||
| @@ -47,8 +45,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | ||||
|     if (!is_tiled) { | ||||
|         ASSERT(type == SurfaceType::Color); | ||||
|  | ||||
|         auto upload_data = texture_data.subspan(start_offset, upload_size); | ||||
|         auto dest_buffer = std::span{gl_buffer}.subspan(start_offset, upload_size); | ||||
|         const auto dest_buffer = std::span{gl_buffer.begin() + start_offset, upload_size}; | ||||
|         if (pixel_format == PixelFormat::RGBA8 && GLES) { | ||||
|             Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); | ||||
|         } else if (pixel_format == PixelFormat::RGB8 && GLES) { | ||||
| @@ -57,7 +54,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | ||||
|             std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); | ||||
|         } | ||||
|     } else { | ||||
|         UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer); | ||||
|         UnswizzleTexture(*this, start_offset, upload_data, gl_buffer); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -66,15 +63,14 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Fl | ||||
| void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | ||||
|     DEBUG_ASSERT(flush_start >= addr && flush_end <= end); | ||||
|  | ||||
|     auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr); | ||||
|     auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); | ||||
|     if (!dest_ptr) [[unlikely]] { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     const auto start_offset = flush_start - addr; | ||||
|     const auto end_offset = flush_end - addr; | ||||
|     const auto texture_data = dest_ptr.GetBytes(flush_end - addr); | ||||
|     const auto download_size = texture_data.size() - start_offset; | ||||
|     const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); | ||||
|     const auto download_size = static_cast<u32>(download_dest.size()); | ||||
|  | ||||
|     MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); | ||||
|  | ||||
| @@ -86,9 +82,9 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | ||||
|             std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); | ||||
|         } | ||||
|  | ||||
|         for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { | ||||
|         for (u32 offset = coarse_start_offset; offset < download_size; offset += fill_size) { | ||||
|             std::memcpy(&dest_ptr[offset], &fill_data[0], | ||||
|                         std::min(fill_size, end_offset - offset)); | ||||
|                         std::min(fill_size, download_size - offset)); | ||||
|         } | ||||
|  | ||||
|         if (backup_bytes) | ||||
| @@ -96,17 +92,16 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | ||||
|     } else if (!is_tiled) { | ||||
|         ASSERT(type == SurfaceType::Color); | ||||
|  | ||||
|         auto dest_buffer = texture_data.subspan(start_offset, download_size); | ||||
|         auto download_data = std::span{gl_buffer}.subspan(start_offset, download_size); | ||||
|         const auto download_data = std::span{gl_buffer.begin() + start_offset, download_size}; | ||||
|         if (pixel_format == PixelFormat::RGBA8 && GLES) { | ||||
|             Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_data); | ||||
|         } else if (pixel_format == PixelFormat::RGB8 && GLES) { | ||||
|             Pica::Texture::ConvertBGRToRGB(gl_buffer, download_data); | ||||
|         } else { | ||||
|             std::memcpy(dest_buffer.data(), download_data.data(), download_size); | ||||
|             std::memcpy(download_dest.data(), download_data.data(), download_size); | ||||
|         } | ||||
|     } else { | ||||
|         SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data); | ||||
|         SwizzleTexture(*this, start_offset, gl_buffer, download_dest); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -5,92 +5,179 @@ | ||||
| #pragma once | ||||
| #include <span> | ||||
| #include <bit> | ||||
| #include <algorithm> | ||||
| #include "common/alignment.h" | ||||
| #include "core/memory.h" | ||||
| #include "common/color.h" | ||||
| #include "video_core/rasterizer_cache/pixel_format.h" | ||||
| #include "video_core/renderer_opengl/gl_vars.h" | ||||
| #include "video_core/texture/etc1.h" | ||||
| #include "video_core/utils.h" | ||||
| #include "video_core/video_core.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| inline u32 MakeInt(std::span<std::byte> bytes) { | ||||
|     u32 integer{}; | ||||
|     std::memcpy(&integer, bytes.data(), sizeof(u32)); | ||||
| template <typename T> | ||||
| inline T MakeInt(const std::byte* bytes) { | ||||
|     T integer{}; | ||||
|     std::memcpy(&integer, bytes, sizeof(T)); | ||||
|  | ||||
|     return integer; | ||||
| } | ||||
|  | ||||
| template <PixelFormat format> | ||||
| inline void DecodePixel(const std::byte* source, std::byte* dest) { | ||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||
|  | ||||
|     if constexpr (format == PixelFormat::D24S8) { | ||||
|         const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8); | ||||
|         std::memcpy(dest, &d24s8, sizeof(u32)); | ||||
|     } else if constexpr (format == PixelFormat::IA8) { | ||||
|         std::memset(dest, static_cast<int>(source[1]), 3); | ||||
|         dest[3] = source[0]; | ||||
|     } else if constexpr (format == PixelFormat::RG8) { | ||||
|         const auto rgba = Color::DecodeRG8(reinterpret_cast<const u8*>(source)); | ||||
|         std::memcpy(dest, rgba.AsArray(), 4); | ||||
|     } else if constexpr (format == PixelFormat::I8) { | ||||
|         std::memset(dest, static_cast<int>(source[0]), 3); | ||||
|         dest[3] = std::byte{255}; | ||||
|     } else if constexpr (format == PixelFormat::A8) { | ||||
|         std::memset(dest, 0, 3); | ||||
|         dest[3] = source[0]; | ||||
|     } else if constexpr (format == PixelFormat::IA4) { | ||||
|         const u8 ia4 = static_cast<const u8>(source[0]); | ||||
|         std::memset(dest, Color::Convert4To8(ia4 >> 4), 3); | ||||
|         dest[3] = std::byte{Color::Convert4To8(ia4 & 0xF)}; | ||||
|     } else if (format == PixelFormat::RGBA8 && GLES) { | ||||
|         const u32 abgr = MakeInt<u32>(source); | ||||
|         const u32 rgba = std::byteswap(abgr); | ||||
|         std::memcpy(dest, &rgba, sizeof(u32)); | ||||
|     } else if (format == PixelFormat::RGB8 && GLES) { | ||||
|         dest[0] = source[2]; | ||||
|         dest[1] = source[1]; | ||||
|         dest[2] = source[0]; | ||||
|     } else { | ||||
|         std::memcpy(dest, source, bytes_per_pixel); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <PixelFormat format> | ||||
| inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) { | ||||
|     const u32 morton_offset = VideoCore::MortonInterleave(x, y); | ||||
|     const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]); | ||||
|     const u8 pixel = Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF)); | ||||
|  | ||||
|     if constexpr (format == PixelFormat::I4) { | ||||
|         std::memset(dest_pixel, static_cast<int>(pixel), 3); | ||||
|         dest_pixel[3] = std::byte{255}; | ||||
|     } else { | ||||
|         std::memset(dest_pixel, 0, 3); | ||||
|         dest_pixel[3] = std::byte{pixel}; | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <PixelFormat format> | ||||
| inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) { | ||||
|     constexpr u32 subtile_width = 4; | ||||
|     constexpr u32 subtile_height = 4; | ||||
|     constexpr bool has_alpha = format == PixelFormat::ETC1A4; | ||||
|     constexpr std::size_t subtile_size = has_alpha ? 16 : 8; | ||||
|  | ||||
|     const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height); | ||||
|     x %= subtile_width; | ||||
|     y %= subtile_height; | ||||
|  | ||||
|     const std::byte* subtile_ptr = source_tile + subtile_index * subtile_size; | ||||
|  | ||||
|     u8 alpha = 255; | ||||
|     if constexpr (has_alpha) { | ||||
|         u64_le packed_alpha; | ||||
|         std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64)); | ||||
|         subtile_ptr += sizeof(u64); | ||||
|  | ||||
|         alpha = Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF); | ||||
|     } | ||||
|  | ||||
|     const u64_le subtile_data = MakeInt<u64_le>(subtile_ptr); | ||||
|     const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y); | ||||
|  | ||||
|     // Copy the uncompressed pixel to the destination | ||||
|     std::memcpy(dest_pixel, rgb.AsArray(), 3); | ||||
|     dest_pixel[3] = std::byte{alpha}; | ||||
| } | ||||
|  | ||||
| template <PixelFormat format> | ||||
| inline void EncodePixel(const std::byte* source, std::byte* dest) { | ||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||
|  | ||||
|     if constexpr (format == PixelFormat::D24S8) { | ||||
|         const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8); | ||||
|         std::memcpy(dest, &s8d24, sizeof(u32)); | ||||
|     } else if (format == PixelFormat::RGBA8 && GLES) { | ||||
|         const u32 abgr = std::byteswap(MakeInt<u32>(source)); | ||||
|         std::memcpy(dest, &abgr, sizeof(u32)); | ||||
|     } else if (format == PixelFormat::RGB8 && GLES) { | ||||
|         dest[0] = source[2]; | ||||
|         dest[1] = source[1]; | ||||
|         dest[2] = source[0]; | ||||
|     } else { | ||||
|         std::memcpy(dest, source, bytes_per_pixel); | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <bool morton_to_linear, PixelFormat format> | ||||
| inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::span<std::byte> linear_buffer) { | ||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||
|     constexpr u32 linear_bytes_per_pixel = GetBytesPerPixel(format); | ||||
|     constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4; | ||||
|     constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4; | ||||
|  | ||||
|     for (u32 y = 0; y < 8; y++) { | ||||
|         for (u32 x = 0; x < 8; x++) { | ||||
|             const u32 tile_offset = VideoCore::MortonInterleave(x, y) * bytes_per_pixel; | ||||
|             const u32 linear_offset = ((7 - y) * stride + x) * linear_bytes_per_pixel; | ||||
|             auto tile_pixel = tile_buffer.subspan(tile_offset, bytes_per_pixel); | ||||
|             auto linear_pixel = linear_buffer.subspan(linear_offset, linear_bytes_per_pixel); | ||||
|  | ||||
|             const auto tiled_pixel = tile_buffer.subspan(VideoCore::MortonInterleave(x, y) * bytes_per_pixel, | ||||
|                                                          bytes_per_pixel); | ||||
|             const auto linear_pixel = linear_buffer.subspan(((7 - y) * stride + x) * linear_bytes_per_pixel, | ||||
|                                                             linear_bytes_per_pixel); | ||||
|             if constexpr (morton_to_linear) { | ||||
|                 if constexpr (format == PixelFormat::D24S8) { | ||||
|                     const u32 s8d24 = MakeInt(tile_pixel); | ||||
|                     const u32 d24s8 = std::rotl(s8d24, 8); | ||||
|                     std::memcpy(linear_pixel.data(), &d24s8, sizeof(u32)); | ||||
|                 } else if (format == PixelFormat::RGBA8 && GLES) { | ||||
|                     const u32 abgr = MakeInt(tile_pixel); | ||||
|                     const u32 rgba = std::byteswap(abgr); | ||||
|                     std::memcpy(linear_pixel.data(), &rgba, sizeof(u32)); | ||||
|                 } else if (format == PixelFormat::RGB8 && GLES) { | ||||
|                     std::memcpy(linear_pixel.data(), tile_pixel.data(), 3); | ||||
|                     std::swap(linear_pixel[0], linear_pixel[2]); | ||||
|                 if constexpr (is_compressed) { | ||||
|                     DecodePixelETC1<format>(x, y, tile_buffer.data(), linear_pixel.data()); | ||||
|                 } else if constexpr (is_4bit) { | ||||
|                     DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data()); | ||||
|                 } else { | ||||
|                     std::memcpy(linear_pixel.data(), tile_pixel.data(), bytes_per_pixel); | ||||
|                     DecodePixel<format>(tiled_pixel.data(), linear_pixel.data()); | ||||
|                 } | ||||
|             } else { | ||||
|                 if constexpr (format == PixelFormat::D24S8) { | ||||
|                     const u32 d24s8 = MakeInt(linear_pixel); | ||||
|                     const u32 s8d24 = std::rotr(d24s8, 8); | ||||
|                     std::memcpy(tile_pixel.data(), &s8d24, sizeof(u32)); | ||||
|                 } else if (format == PixelFormat::RGBA8 && GLES) { | ||||
|                     const u32 rgba = MakeInt(linear_pixel); | ||||
|                     const u32 abgr = std::byteswap(rgba); | ||||
|                     std::memcpy(tile_pixel.data(), &abgr, sizeof(u32)); | ||||
|                 } else if (format == PixelFormat::RGB8 && GLES) { | ||||
|                     std::memcpy(tile_pixel.data(), linear_pixel.data(), 3); | ||||
|                     std::swap(tile_pixel[0], tile_pixel[2]); | ||||
|                 } else { | ||||
|                     std::memcpy(tile_pixel.data(), linear_pixel.data(), bytes_per_pixel); | ||||
|                 } | ||||
|                 EncodePixel<format>(linear_pixel.data(), tiled_pixel.data()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <bool morton_to_linear, PixelFormat format> | ||||
| static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer, | ||||
|                        PAddr base, PAddr start, PAddr end) { | ||||
| static void MortonCopy(u32 stride, u32 height, u32 start_offset, | ||||
|                        std::span<std::byte> linear_buffer, | ||||
|                        std::span<std::byte> tiled_buffer) { | ||||
|  | ||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||
|     constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); | ||||
|     static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); | ||||
|  | ||||
|     constexpr u32 tile_size = bytes_per_pixel * 64; | ||||
|     // We could use bytes_per_pixel here but it should be avoided because it | ||||
|     // becomes zero for 4-bit textures! | ||||
|     constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; | ||||
|     const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel; | ||||
|     const u32 end_offset = start_offset + static_cast<u32>(tiled_buffer.size()); | ||||
|  | ||||
|     // This only applies for D24 format, by shifting the span one byte all pixels | ||||
|     // are written properly without byteswap | ||||
|     u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; | ||||
|     // Does this line have any significance? | ||||
|     //u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; | ||||
|     u32 linear_offset = 0; | ||||
|     u32 tiled_offset = 0; | ||||
|  | ||||
|     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); | ||||
|     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); | ||||
|     PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); | ||||
|     const PAddr aligned_down_start_offset = Common::AlignDown(start_offset, tile_size); | ||||
|     const PAddr aligned_start_offset = Common::AlignUp(start_offset, tile_size); | ||||
|     PAddr aligned_end_offset = Common::AlignDown(end_offset, tile_size); | ||||
|  | ||||
|     ASSERT(!morton_to_linear || (aligned_start == start && aligned_end == end)); | ||||
|     ASSERT(!morton_to_linear || (aligned_start_offset == start_offset && aligned_end_offset == end_offset)); | ||||
|  | ||||
|     const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; | ||||
|     const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format); | ||||
|     u32 x = (begin_pixel_index % (stride * 8)) / 8; | ||||
|     u32 y = (begin_pixel_index / (stride * 8)) * 8; | ||||
|  | ||||
| @@ -112,29 +199,21 @@ static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffe | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     // If during a texture download the start coordinate is inside a tile, swizzle | ||||
|     // the tile to a temporary buffer and copy the part we are interested in | ||||
|     if (start < aligned_start && !morton_to_linear) { | ||||
|     // If during a texture download the start coordinate is not tile aligned, swizzle | ||||
|     // the tile affected to a temporary buffer and copy the part we are interested in | ||||
|     if (start_offset < aligned_start_offset && !morton_to_linear) { | ||||
|         std::array<std::byte, tile_size> tmp_buf; | ||||
|         auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); | ||||
|         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); | ||||
|         std::memcpy(tiled_buffer.data(), tmp_buf.data() + start - aligned_down_start, | ||||
|                     std::min(aligned_start, end) - start); | ||||
|  | ||||
|         tiled_offset += aligned_start - start; | ||||
|         std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset, | ||||
|                     std::min(aligned_start_offset, end_offset) - start_offset); | ||||
|  | ||||
|         tiled_offset += aligned_start_offset - start_offset; | ||||
|         linear_next_tile(); | ||||
|     } | ||||
|  | ||||
|     // Pokemon Super Mystery Dungeon will try to use textures that go beyond | ||||
|     // the end address of VRAM. Clamp the address to the end of VRAM if that happens | ||||
|     // TODO: Move this to the rasterizer cache | ||||
|     if (const u32 clamped_end = VideoCore::g_memory->ClampPhysicalAddress(aligned_start, aligned_end); | ||||
|             clamped_end != aligned_end) { | ||||
|         LOG_ERROR(Render_OpenGL, "Out of bound texture read address {:#x}, clamping to {:#x}", aligned_end, clamped_end); | ||||
|         aligned_end = clamped_end; | ||||
|     } | ||||
|  | ||||
|     const u32 buffer_end = tiled_offset + aligned_end - aligned_start; | ||||
|     const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset; | ||||
|     while (tiled_offset < buffer_end) { | ||||
|         auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); | ||||
|         auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); | ||||
| @@ -143,15 +222,17 @@ static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffe | ||||
|         linear_next_tile(); | ||||
|     } | ||||
|  | ||||
|     if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { | ||||
|     // If during a texture download the end coordinate is not tile aligned, swizzle | ||||
|     // the tile affected to a temporary buffer and copy the part we are interested in | ||||
|     if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) { | ||||
|         std::array<std::byte, tile_size> tmp_buf; | ||||
|         auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); | ||||
|         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); | ||||
|         std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end - aligned_end); | ||||
|         std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end_offset - aligned_end_offset); | ||||
|     } | ||||
| } | ||||
|  | ||||
| using MortonFunc = void (*)(u32, u32, std::span<std::byte>, std::span<std::byte>, PAddr, PAddr, PAddr); | ||||
| using MortonFunc = void (*)(u32, u32, u32, std::span<std::byte>, std::span<std::byte>); | ||||
|  | ||||
| static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = { | ||||
|     MortonCopy<true, PixelFormat::RGBA8>,  // 0 | ||||
| @@ -159,15 +240,15 @@ static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = { | ||||
|     MortonCopy<true, PixelFormat::RGB5A1>, // 2 | ||||
|     MortonCopy<true, PixelFormat::RGB565>, // 3 | ||||
|     MortonCopy<true, PixelFormat::RGBA4>,  // 4 | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr, | ||||
|     nullptr,                             // 5 - 13 | ||||
|     MortonCopy<true, PixelFormat::IA8>,  // 5 | ||||
|     MortonCopy<true, PixelFormat::RG8>,  // 6 | ||||
|     MortonCopy<true, PixelFormat::I8>,  // 7 | ||||
|     MortonCopy<true, PixelFormat::A8>,  // 8 | ||||
|     MortonCopy<true, PixelFormat::IA4>,  // 9 | ||||
|     MortonCopy<true, PixelFormat::I4>,  // 10 | ||||
|     MortonCopy<true, PixelFormat::A4>,  // 11 | ||||
|     MortonCopy<true, PixelFormat::ETC1>,  // 12 | ||||
|     MortonCopy<true, PixelFormat::ETC1A4>,  // 13 | ||||
|     MortonCopy<true, PixelFormat::D16>,  // 14 | ||||
|     nullptr,                             // 15 | ||||
|     MortonCopy<true, PixelFormat::D24>,  // 16 | ||||
|   | ||||
| @@ -180,7 +180,7 @@ constexpr u32 GetFormatBpp(PixelFormat format) { | ||||
|     case PixelFormat::ETC1: | ||||
|         return 4; | ||||
|     default: | ||||
|         return 0; | ||||
|         return 1; | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -5,11 +5,13 @@ | ||||
| #pragma once | ||||
| #include <glad/glad.h> | ||||
| #include "common/assert.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/texture/texture_decode.h" | ||||
| #include "video_core/rasterizer_cache/morton_swizzle.h" | ||||
| #include "video_core/rasterizer_cache/surface_params.h" | ||||
| #include "video_core/rasterizer_cache/utils.h" | ||||
| #include "video_core/renderer_opengl/gl_vars.h" | ||||
| #include "video_core/video_core.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| @@ -57,45 +59,18 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | ||||
|     return tex_tuple; | ||||
| } | ||||
|  | ||||
| void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | ||||
| void SwizzleTexture(const SurfaceParams& params, u32 start_offset, | ||||
|                     std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) { | ||||
|     const u32 func_index = static_cast<u32>(params.pixel_format); | ||||
|     const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index]; | ||||
|  | ||||
|     // TODO: Move memory access out of the morton function | ||||
|     SwizzleImpl(params.stride, params.height, source_linear, dest_tiled, params.addr, flush_start, flush_end); | ||||
|     SwizzleImpl(params.stride, params.height, start_offset, source_linear, dest_tiled); | ||||
| } | ||||
|  | ||||
| void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | ||||
| void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, | ||||
|                       std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) { | ||||
|     // TODO: Integrate this to UNSWIZZLE_TABLE | ||||
|     if (params.type == SurfaceType::Texture) { | ||||
|         Pica::Texture::TextureInfo tex_info{}; | ||||
|         tex_info.width = params.width; | ||||
|         tex_info.height = params.height; | ||||
|         tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(params.pixel_format); | ||||
|         tex_info.SetDefaultStride(); | ||||
|         tex_info.physical_address = params.addr; | ||||
|  | ||||
|         const SurfaceInterval load_interval(load_start, load_end); | ||||
|         const auto rect = params.GetSubRect(params.FromInterval(load_interval)); | ||||
|         DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval); | ||||
|  | ||||
|         const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data()); | ||||
|         for (u32 y = rect.bottom; y < rect.top; y++) { | ||||
|             for (u32 x = rect.left; x < rect.right; x++) { | ||||
|                 auto vec4 = | ||||
|                     Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); | ||||
|                 const std::size_t offset = (x + (params.width * y)) * 4; | ||||
|                 std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     } else { | ||||
|         const u32 func_index = static_cast<u32>(params.pixel_format); | ||||
|         const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index]; | ||||
|         UnswizzleImpl(params.stride, params.height, dest_linear, source_tiled, params.addr, load_start, load_end); | ||||
|     } | ||||
|     const u32 func_index = static_cast<u32>(params.pixel_format); | ||||
|     const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index]; | ||||
|     UnswizzleImpl(params.stride, params.height, start_offset, dest_linear, source_tiled); | ||||
| } | ||||
|  | ||||
| ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) { | ||||
|   | ||||
| @@ -50,22 +50,22 @@ struct TextureCubeConfig { | ||||
|  | ||||
| class SurfaceParams; | ||||
|  | ||||
| void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | ||||
| [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); | ||||
|  | ||||
| void SwizzleTexture(const SurfaceParams& params, u32 start_offset, | ||||
|                     std::span<std::byte> source_linear, std::span<std::byte> dest_tiled); | ||||
|  | ||||
| /** | ||||
|  * Converts a morton swizzled texture to linear format. | ||||
|  * | ||||
|  * @param params Structure used to query the surface information. | ||||
|  * @param load_start, load_end The address range to unswizzle texture data. | ||||
|  * @param source_tiled The source swizzled data. The span begins at surface base address not load_start. | ||||
|  * @param start_offset Is the offset at which the source_tiled span begins | ||||
|  * @param source_tiled The source morton swizzled data. | ||||
|  * @param dest_linear The output buffer where the generated linear data will be written to. | ||||
|  */ | ||||
| void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | ||||
| void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, | ||||
|                       std::span<std::byte> source_tiled, std::span<std::byte> dest_linear); | ||||
|  | ||||
| [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); | ||||
|  | ||||
| } // namespace OpenGL | ||||
|  | ||||
| namespace std { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user