morton_swizzle: Optimize and use std::span
This commit is contained in:
		| @@ -35,19 +35,11 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | |||||||
|     const bool need_swap = |     const bool need_swap = | ||||||
|         GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); |         GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); | ||||||
|  |  | ||||||
|     const u8* texture_ptr = VideoCore::g_memory->GetPhysicalPointer(addr); |     u8* texture_ptr = VideoCore::g_memory->GetPhysicalPointer(addr); | ||||||
|     if (texture_ptr == nullptr) { |     if (texture_ptr == nullptr) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const u32 byte_size = width * height * GetBytesPerPixel(pixel_format); |  | ||||||
|     const auto texture_data = std::span<const std::byte>{reinterpret_cast<const std::byte*>(texture_ptr), |  | ||||||
|                                                          byte_size}; |  | ||||||
|  |  | ||||||
|     if (gl_buffer.empty()) { |  | ||||||
|         gl_buffer.resize(byte_size); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // TODO: Should probably be done in ::Memory:: and check for other regions too |     // TODO: Should probably be done in ::Memory:: and check for other regions too | ||||||
|     if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) |     if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) | ||||||
|         load_end = Memory::VRAM_VADDR_END; |         load_end = Memory::VRAM_VADDR_END; | ||||||
| @@ -55,10 +47,16 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | |||||||
|     if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) |     if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) | ||||||
|         load_start = Memory::VRAM_VADDR; |         load_start = Memory::VRAM_VADDR; | ||||||
|  |  | ||||||
|     MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); |  | ||||||
|  |  | ||||||
|     ASSERT(load_start >= addr && load_end <= end); |     ASSERT(load_start >= addr && load_end <= end); | ||||||
|  |  | ||||||
|     const u32 start_offset = load_start - addr; |     const u32 start_offset = load_start - addr; | ||||||
|  |     const u32 byte_size = width * height * GetBytesPerPixel(pixel_format); | ||||||
|  |  | ||||||
|  |     if (gl_buffer.empty()) { | ||||||
|  |         gl_buffer.resize(byte_size); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); | ||||||
|  |  | ||||||
|     if (!is_tiled) { |     if (!is_tiled) { | ||||||
|         ASSERT(type == SurfaceType::Color); |         ASSERT(type == SurfaceType::Color); | ||||||
| @@ -67,33 +65,31 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | |||||||
|             // cannot fully test this |             // cannot fully test this | ||||||
|             if (pixel_format == PixelFormat::RGBA8) { |             if (pixel_format == PixelFormat::RGBA8) { | ||||||
|                 for (std::size_t i = start_offset; i < load_end - addr; i += 4) { |                 for (std::size_t i = start_offset; i < load_end - addr; i += 4) { | ||||||
|                     gl_buffer[i] = texture_ptr[i + 3]; |                     gl_buffer[i] = (std::byte)texture_ptr[i + 3]; | ||||||
|                     gl_buffer[i + 1] = texture_ptr[i + 2]; |                     gl_buffer[i + 1] = (std::byte)texture_ptr[i + 2]; | ||||||
|                     gl_buffer[i + 2] = texture_ptr[i + 1]; |                     gl_buffer[i + 2] = (std::byte)texture_ptr[i + 1]; | ||||||
|                     gl_buffer[i + 3] = texture_ptr[i]; |                     gl_buffer[i + 3] = (std::byte)texture_ptr[i]; | ||||||
|                 } |                 } | ||||||
|             } else if (pixel_format == PixelFormat::RGB8) { |             } else if (pixel_format == PixelFormat::RGB8) { | ||||||
|                 for (std::size_t i = start_offset; i < load_end - addr; i += 3) { |                 for (std::size_t i = start_offset; i < load_end - addr; i += 3) { | ||||||
|                     gl_buffer[i] = texture_ptr[i + 2]; |                     gl_buffer[i] = (std::byte)texture_ptr[i + 2]; | ||||||
|                     gl_buffer[i + 1] = texture_ptr[i + 1]; |                     gl_buffer[i + 1] = (std::byte)texture_ptr[i + 1]; | ||||||
|                     gl_buffer[i + 2] = texture_ptr[i]; |                     gl_buffer[i + 2] = (std::byte)texture_ptr[i]; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             std::memcpy(&gl_buffer[start_offset], texture_ptr + start_offset, |             std::memcpy(gl_buffer.data() + start_offset, texture_ptr + start_offset, load_end - load_start); | ||||||
|                         load_end - load_start); |  | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         const auto dest_data = std::span<std::byte>{reinterpret_cast<std::byte*>(gl_buffer.data()), |         std::span<std::byte> texture_data{(std::byte*)texture_ptr, byte_size}; | ||||||
|                                                     byte_size}; |         UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer); | ||||||
|         UnswizzleTexture(*this, load_start, load_end, texture_data, dest_data); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", | MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", | ||||||
|                     MP_RGB(128, 192, 64)); |                     MP_RGB(128, 192, 64)); | ||||||
| void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | ||||||
|     u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); |     u8* dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); | ||||||
|     if (dst_buffer == nullptr) { |     if (dst_buffer == nullptr) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| @@ -134,25 +130,24 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | |||||||
|         ASSERT(type == SurfaceType::Color); |         ASSERT(type == SurfaceType::Color); | ||||||
|         if (pixel_format == PixelFormat::RGBA8 && GLES) { |         if (pixel_format == PixelFormat::RGBA8 && GLES) { | ||||||
|             for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { |             for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { | ||||||
|                 dst_buffer[i] = gl_buffer[i + 3]; |                 dst_buffer[i] = (u8)gl_buffer[i + 3]; | ||||||
|                 dst_buffer[i + 1] = gl_buffer[i + 2]; |                 dst_buffer[i + 1] = (u8)gl_buffer[i + 2]; | ||||||
|                 dst_buffer[i + 2] = gl_buffer[i + 1]; |                 dst_buffer[i + 2] = (u8)gl_buffer[i + 1]; | ||||||
|                 dst_buffer[i + 3] = gl_buffer[i]; |                 dst_buffer[i + 3] = (u8)gl_buffer[i]; | ||||||
|             } |             } | ||||||
|         } else if (pixel_format == PixelFormat::RGB8 && GLES) { |         } else if (pixel_format == PixelFormat::RGB8 && GLES) { | ||||||
|             for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { |             for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { | ||||||
|                 dst_buffer[i] = gl_buffer[i + 2]; |                 dst_buffer[i] = (u8)gl_buffer[i + 2]; | ||||||
|                 dst_buffer[i + 1] = gl_buffer[i + 1]; |                 dst_buffer[i + 1] = (u8)gl_buffer[i + 1]; | ||||||
|                 dst_buffer[i + 2] = gl_buffer[i]; |                 dst_buffer[i + 2] = (u8)gl_buffer[i]; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], |             std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], | ||||||
|                         flush_end - flush_start); |                         flush_end - flush_start); | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         const auto source_data = std::span<std::byte>{reinterpret_cast<std::byte*>(gl_buffer.data()), |         std::span<std::byte> texture_data{(std::byte*)dst_buffer + start_offset, byte_size}; | ||||||
|                                                       byte_size}; |         SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data); | ||||||
|         SwizzleTexture(*this, flush_start, flush_end, source_data, {}); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -425,7 +420,7 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) { | |||||||
|             .region = rect |             .region = rect | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         runtime.ReadTexture(texture, subresource, tuple, gl_buffer.data()); |         runtime.ReadTexture(texture, subresource, tuple, (u8*)gl_buffer.data()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     glPixelStorei(GL_PACK_ROW_LENGTH, 0); |     glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||||||
|   | |||||||
| @@ -104,7 +104,7 @@ public: | |||||||
| public: | public: | ||||||
|     bool registered = false; |     bool registered = false; | ||||||
|     SurfaceRegions invalid_regions; |     SurfaceRegions invalid_regions; | ||||||
|     std::vector<u8> gl_buffer; |     std::vector<std::byte> gl_buffer; | ||||||
|  |  | ||||||
|     // Number of bytes to read from fill_data |     // Number of bytes to read from fill_data | ||||||
|     u32 fill_size = 0; |     u32 fill_size = 0; | ||||||
|   | |||||||
| @@ -3,6 +3,8 @@ | |||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  | #include <span> | ||||||
|  | #include <bit> | ||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/rasterizer_cache/pixel_format.h" | #include "video_core/rasterizer_cache/pixel_format.h" | ||||||
| @@ -12,50 +14,54 @@ | |||||||
|  |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
|  |  | ||||||
|  | inline u32 MakeInt(std::span<std::byte> bytes) { | ||||||
|  |     u32 integer{}; | ||||||
|  |     std::memcpy(&integer, bytes.data(), sizeof(u32)); | ||||||
|  |  | ||||||
|  |     return integer; | ||||||
|  | } | ||||||
|  |  | ||||||
| template <bool morton_to_linear, PixelFormat format> | template <bool morton_to_linear, PixelFormat format> | ||||||
| static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* linear_buffer) { | inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::span<std::byte> linear_buffer) { | ||||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; |     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||||
|     constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); |     constexpr u32 linear_bytes_per_pixel = GetBytesPerPixel(format); | ||||||
|  |  | ||||||
|     for (u32 y = 0; y < 8; y++) { |     for (u32 y = 0; y < 8; y++) { | ||||||
|         for (u32 x = 0; x < 8; x++) { |         for (u32 x = 0; x < 8; x++) { | ||||||
|             u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; |             const u32 tile_offset = VideoCore::MortonInterleave(x, y) * bytes_per_pixel; | ||||||
|             u8* linear_ptr = linear_buffer + ((7 - y) * stride + x) * aligned_bytes_per_pixel; |             const u32 linear_offset = ((7 - y) * stride + x) * linear_bytes_per_pixel; | ||||||
|  |             auto tile_pixel = tile_buffer.subspan(tile_offset, bytes_per_pixel); | ||||||
|  |             auto linear_pixel = linear_buffer.subspan(linear_offset, linear_bytes_per_pixel); | ||||||
|  |  | ||||||
|             if constexpr (morton_to_linear) { |             if constexpr (morton_to_linear) { | ||||||
|                 if constexpr (format == PixelFormat::D24S8) { |                 if constexpr (format == PixelFormat::D24S8) { | ||||||
|                     linear_ptr[0] = tile_ptr[3]; |                     const u32 s8d24 = MakeInt(tile_pixel); | ||||||
|                     std::memcpy(linear_ptr + 1, tile_ptr, 3); |                     const u32 d24s8 = std::rotl(s8d24, 8); | ||||||
|  |                     std::memcpy(linear_pixel.data(), &d24s8, sizeof(u32)); | ||||||
|                 } else if (format == PixelFormat::RGBA8 && GLES) { |                 } else if (format == PixelFormat::RGBA8 && GLES) { | ||||||
|                     // because GLES does not have ABGR format |                     const u32 abgr = MakeInt(tile_pixel); | ||||||
|                     // so we will do byteswapping here |                     const u32 rgba = std::byteswap(abgr); | ||||||
|                     linear_ptr[0] = tile_ptr[3]; |                     std::memcpy(linear_pixel.data(), &rgba, sizeof(u32)); | ||||||
|                     linear_ptr[1] = tile_ptr[2]; |  | ||||||
|                     linear_ptr[2] = tile_ptr[1]; |  | ||||||
|                     linear_ptr[3] = tile_ptr[0]; |  | ||||||
|                 } else if (format == PixelFormat::RGB8 && GLES) { |                 } else if (format == PixelFormat::RGB8 && GLES) { | ||||||
|                     linear_ptr[0] = tile_ptr[2]; |                     std::memcpy(linear_pixel.data(), tile_pixel.data(), 3); | ||||||
|                     linear_ptr[1] = tile_ptr[1]; |                     std::swap(linear_pixel[0], linear_pixel[2]); | ||||||
|                     linear_ptr[2] = tile_ptr[0]; |  | ||||||
|                 } else { |                 } else { | ||||||
|                     std::memcpy(linear_ptr, tile_ptr, bytes_per_pixel); |                     std::memcpy(linear_pixel.data(), tile_pixel.data(), bytes_per_pixel); | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 if constexpr (format == PixelFormat::D24S8) { |                 if constexpr (format == PixelFormat::D24S8) { | ||||||
|                     std::memcpy(tile_ptr, linear_ptr + 1, 3); |                     const u32 d24s8 = MakeInt(linear_pixel); | ||||||
|                     tile_ptr[3] = linear_ptr[0]; |                     const u32 s8d24 = std::rotr(d24s8, 8); | ||||||
|  |                     std::memcpy(tile_pixel.data(), &s8d24, sizeof(u32)); | ||||||
|                 } else if (format == PixelFormat::RGBA8 && GLES) { |                 } else if (format == PixelFormat::RGBA8 && GLES) { | ||||||
|                     // because GLES does not have ABGR format |                     const u32 rgba = MakeInt(linear_pixel); | ||||||
|                     // so we will do byteswapping here |                     const u32 abgr = std::byteswap(rgba); | ||||||
|                     tile_ptr[0] = linear_ptr[3]; |                     std::memcpy(tile_pixel.data(), &abgr, sizeof(u32)); | ||||||
|                     tile_ptr[1] = linear_ptr[2]; |  | ||||||
|                     tile_ptr[2] = linear_ptr[1]; |  | ||||||
|                     tile_ptr[3] = linear_ptr[0]; |  | ||||||
|                 } else if (format == PixelFormat::RGB8 && GLES) { |                 } else if (format == PixelFormat::RGB8 && GLES) { | ||||||
|                     tile_ptr[0] = linear_ptr[2]; |                     std::memcpy(tile_pixel.data(), linear_pixel.data(), 3); | ||||||
|                     tile_ptr[1] = linear_ptr[1]; |                     std::swap(tile_pixel[0], tile_pixel[2]); | ||||||
|                     tile_ptr[2] = linear_ptr[0]; |  | ||||||
|                 } else { |                 } else { | ||||||
|                     std::memcpy(tile_ptr, linear_ptr, bytes_per_pixel); |                     std::memcpy(tile_pixel.data(), linear_pixel.data(), bytes_per_pixel); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @@ -63,13 +69,20 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* linear_buffer) { | |||||||
| } | } | ||||||
|  |  | ||||||
| template <bool morton_to_linear, PixelFormat format> | template <bool morton_to_linear, PixelFormat format> | ||||||
| static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PAddr start, PAddr end) { | static void MortonCopy(u32 stride, u32 height, | ||||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; |                        std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer, | ||||||
|     constexpr u32 tile_size = bytes_per_pixel * 64; |                        PAddr base, PAddr start, PAddr end) { | ||||||
|  |  | ||||||
|  |     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||||
|     constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); |     constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); | ||||||
|     static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); |     static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); | ||||||
|     linear_buffer += aligned_bytes_per_pixel - bytes_per_pixel; |  | ||||||
|  |     constexpr u32 tile_size = bytes_per_pixel * 64; | ||||||
|  |     const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel; | ||||||
|  |  | ||||||
|  |     // This only applies for D24 format, by shifting the span one byte all pixels | ||||||
|  |     // are written properly without byteswap | ||||||
|  |     u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; | ||||||
|  |  | ||||||
|     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); |     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); | ||||||
|     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); |     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); | ||||||
| @@ -84,18 +97,19 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA | |||||||
|     // In OpenGL the texture origin is in the bottom left corner as opposed to other |     // In OpenGL the texture origin is in the bottom left corner as opposed to other | ||||||
|     // APIs that have it at the top left. To avoid flipping texture coordinates in |     // APIs that have it at the top left. To avoid flipping texture coordinates in | ||||||
|     // the shader we read/write the linear buffer backwards |     // the shader we read/write the linear buffer backwards | ||||||
|     linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; |     //linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; | ||||||
|  |     linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; | ||||||
|  |  | ||||||
|     auto linear_next_tile = [&] { |     auto linear_next_tile = [&] { | ||||||
|         x = (x + 8) % stride; |         x = (x + 8) % stride; | ||||||
|         linear_buffer += 8 * aligned_bytes_per_pixel; |         linear_offset += 8 * aligned_bytes_per_pixel; | ||||||
|         if (!x) { |         if (!x) { | ||||||
|             y  = (y + 8) % height; |             y  = (y + 8) % height; | ||||||
|             if (!y) { |             if (!y) { | ||||||
|                 return; |                 return; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             linear_buffer -= stride * 9 * aligned_bytes_per_pixel; |             linear_offset -= stride * 9 * aligned_bytes_per_pixel; | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
| @@ -104,8 +118,10 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA | |||||||
|     // If during a texture download the start coordinate is inside a tile, swizzle |     // If during a texture download the start coordinate is inside a tile, swizzle | ||||||
|     // the tile to a temporary buffer and copy the part we are interested in |     // the tile to a temporary buffer and copy the part we are interested in | ||||||
|     if (start < aligned_start && !morton_to_linear) { |     if (start < aligned_start && !morton_to_linear) { | ||||||
|         std::array<u8, tile_size> tmp_buf; |         std::array<std::byte, tile_size> tmp_buf; | ||||||
|         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf.data(), linear_buffer); |         std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); | ||||||
|  |  | ||||||
|  |         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); | ||||||
|         std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start, |         std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start, | ||||||
|                     std::min(aligned_start, end) - start); |                     std::min(aligned_start, end) - start); | ||||||
|  |  | ||||||
| @@ -124,19 +140,23 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA | |||||||
|  |  | ||||||
|     const u8* buffer_end = tile_buffer + aligned_end - aligned_start; |     const u8* buffer_end = tile_buffer + aligned_end - aligned_start; | ||||||
|     while (tile_buffer < buffer_end) { |     while (tile_buffer < buffer_end) { | ||||||
|         MortonCopyTile<morton_to_linear, format>(stride, tile_buffer, linear_buffer); |         std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); | ||||||
|  |         auto tiled_data = std::span<std::byte>{(std::byte*)tile_buffer, tile_size}; | ||||||
|  |  | ||||||
|  |         MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data); | ||||||
|         tile_buffer += tile_size; |         tile_buffer += tile_size; | ||||||
|         linear_next_tile(); |         linear_next_tile(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { |     if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { | ||||||
|         std::array<u8, tile_size> tmp_buf; |         std::array<std::byte, tile_size> tmp_buf; | ||||||
|         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf.data(), linear_buffer); |         std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); | ||||||
|  |         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); | ||||||
|         std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end); |         std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| using MortonFunc = void (*)(u32, u32, u8*, PAddr, PAddr, PAddr); | using MortonFunc = void (*)(u32, u32, std::span<std::byte>, std::span<std::byte>, PAddr, PAddr, PAddr); | ||||||
|  |  | ||||||
| static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = { | static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = { | ||||||
|     MortonCopy<true, PixelFormat::RGBA8>,  // 0 |     MortonCopy<true, PixelFormat::RGBA8>,  // 0 | ||||||
|   | |||||||
| @@ -58,17 +58,16 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | ||||||
|                     std::span<std::byte> source, std::span<std::byte> dest) { |                     std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) { | ||||||
|     const u32 func_index = static_cast<u32>(params.pixel_format); |     const u32 func_index = static_cast<u32>(params.pixel_format); | ||||||
|     const MortonFunc swizzle = SWIZZLE_TABLE[func_index]; |     const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index]; | ||||||
|     u8* source_data = reinterpret_cast<u8*>(source.data()); |  | ||||||
|  |  | ||||||
|     // TODO: Move memory access out of the morton function |     // TODO: Move memory access out of the morton function | ||||||
|     swizzle(params.stride, params.height, source_data, params.addr, flush_start, flush_end); |     SwizzleImpl(params.stride, params.height, source_linear, dest_tiled, params.addr, flush_start, flush_end); | ||||||
| } | } | ||||||
|  |  | ||||||
| void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | ||||||
|                       std::span<const std::byte> source, std::span<std::byte> dest) { |                       std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) { | ||||||
|     // TODO: Integrate this to UNSWIZZLE_TABLE |     // TODO: Integrate this to UNSWIZZLE_TABLE | ||||||
|     if (params.type == SurfaceType::Texture) { |     if (params.type == SurfaceType::Texture) { | ||||||
|         Pica::Texture::TextureInfo tex_info{}; |         Pica::Texture::TextureInfo tex_info{}; | ||||||
| @@ -82,21 +81,19 @@ void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | |||||||
|         const auto rect = params.GetSubRect(params.FromInterval(load_interval)); |         const auto rect = params.GetSubRect(params.FromInterval(load_interval)); | ||||||
|         DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval); |         DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval); | ||||||
|  |  | ||||||
|         const u8* source_data = reinterpret_cast<const u8*>(source.data()); |         const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data()); | ||||||
|         for (u32 y = rect.bottom; y < rect.top; y++) { |         for (u32 y = rect.bottom; y < rect.top; y++) { | ||||||
|             for (u32 x = rect.left; x < rect.right; x++) { |             for (u32 x = rect.left; x < rect.right; x++) { | ||||||
|                 auto vec4 = |                 auto vec4 = | ||||||
|                     Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); |                     Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); | ||||||
|                 const std::size_t offset = (x + (params.width * y)) * 4; |                 const std::size_t offset = (x + (params.width * y)) * 4; | ||||||
|                 std::memcpy(dest.data() + offset, vec4.AsArray(), 4); |                 std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         const u32 func_index = static_cast<u32>(params.pixel_format); |         const u32 func_index = static_cast<u32>(params.pixel_format); | ||||||
|         const MortonFunc deswizzle = UNSWIZZLE_TABLE[func_index]; |         const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index]; | ||||||
|         u8* dest_data = reinterpret_cast<u8*>(dest.data()); |         UnswizzleImpl(params.stride, params.height, dest_linear, source_tiled, params.addr, load_start, load_end); | ||||||
|  |  | ||||||
|         deswizzle(params.stride, params.height, dest_data, params.addr, load_start, load_end); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -51,10 +51,10 @@ struct TextureCubeConfig { | |||||||
| class SurfaceParams; | class SurfaceParams; | ||||||
|  |  | ||||||
| void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | ||||||
|                     std::span<std::byte> source, std::span<std::byte> dest); |                     std::span<std::byte> source_linear, std::span<std::byte> dest_tiled); | ||||||
|  |  | ||||||
| void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | ||||||
|                       std::span<const std::byte> source, std::span<std::byte> dest); |                       std::span<std::byte> source_tiled, std::span<std::byte> dest_linear); | ||||||
|  |  | ||||||
| [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); | [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user