morton_swizzle: Use tiled_buffer instead of reading data from g_memory
* It's much safer and removes hardcoded global state usage
This commit is contained in:
		| @@ -27,13 +27,16 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa | |||||||
| void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | ||||||
|     DEBUG_ASSERT(load_start >= addr && load_end <= end); |     DEBUG_ASSERT(load_start >= addr && load_end <= end); | ||||||
|  |  | ||||||
|     auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start); |     // We start reading from addr instead of load_start, otherwise LookupTexture | ||||||
|     if (!source_ptr) { |     // in UnswizzleTexture will not sample texels correctly | ||||||
|  |     auto source_ptr = VideoCore::g_memory->GetPhysicalRef(addr); | ||||||
|  |     if (!source_ptr) [[unlikely]] { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const auto upload_size = std::clamp<std::size_t>(load_end - load_start, 0u, source_ptr.GetSize()); |     const auto start_offset = load_start - addr; | ||||||
|     const auto upload_data = source_ptr.GetBytes(upload_size); |     const auto texture_data = source_ptr.GetBytes(load_end - addr); | ||||||
|  |     const auto upload_size = texture_data.size() - start_offset; | ||||||
|  |  | ||||||
|     if (gl_buffer.empty()) { |     if (gl_buffer.empty()) { | ||||||
|         gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); |         gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); | ||||||
| @@ -43,15 +46,18 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | |||||||
|  |  | ||||||
|     if (!is_tiled) { |     if (!is_tiled) { | ||||||
|         ASSERT(type == SurfaceType::Color); |         ASSERT(type == SurfaceType::Color); | ||||||
|  |  | ||||||
|  |         auto upload_data = texture_data.subspan(start_offset, upload_size); | ||||||
|  |         auto dest_buffer = std::span{gl_buffer}.subspan(start_offset, upload_size); | ||||||
|         if (pixel_format == PixelFormat::RGBA8 && GLES) { |         if (pixel_format == PixelFormat::RGBA8 && GLES) { | ||||||
|             Pica::Texture::ConvertABGRToRGBA(upload_data, gl_buffer); |             Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); | ||||||
|         } else if (pixel_format == PixelFormat::RGB8 && GLES) { |         } else if (pixel_format == PixelFormat::RGB8 && GLES) { | ||||||
|             Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer); |             Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); | ||||||
|         } else { |         } else { | ||||||
|             std::memcpy(gl_buffer.data() + load_start - addr, source_ptr, upload_size); |             std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer); |         UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -61,15 +67,14 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | |||||||
|     DEBUG_ASSERT(flush_start >= addr && flush_end <= end); |     DEBUG_ASSERT(flush_start >= addr && flush_end <= end); | ||||||
|  |  | ||||||
|     auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr); |     auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr); | ||||||
|     if (!dest_ptr) { |     if (!dest_ptr) [[unlikely]] { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const auto download_size = std::clamp<std::size_t>(flush_end - flush_start, 0u, dest_ptr.GetSize()); |     const auto start_offset = flush_start - addr; | ||||||
|     const auto download_loc = dest_ptr.GetBytes(download_size); |     const auto end_offset = flush_end - addr; | ||||||
|  |     const auto texture_data = dest_ptr.GetBytes(flush_end - addr); | ||||||
|     const u32 start_offset = flush_start - addr; |     const auto download_size = texture_data.size() - start_offset; | ||||||
|     const u32 end_offset = flush_end - addr; |  | ||||||
|  |  | ||||||
|     MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); |     MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); | ||||||
|  |  | ||||||
| @@ -90,15 +95,18 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | |||||||
|             std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); |             std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); | ||||||
|     } else if (!is_tiled) { |     } else if (!is_tiled) { | ||||||
|         ASSERT(type == SurfaceType::Color); |         ASSERT(type == SurfaceType::Color); | ||||||
|  |  | ||||||
|  |         auto dest_buffer = texture_data.subspan(start_offset, download_size); | ||||||
|  |         auto download_data = std::span{gl_buffer}.subspan(start_offset, download_size); | ||||||
|         if (pixel_format == PixelFormat::RGBA8 && GLES) { |         if (pixel_format == PixelFormat::RGBA8 && GLES) { | ||||||
|             Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_loc); |             Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_data); | ||||||
|         } else if (pixel_format == PixelFormat::RGB8 && GLES) { |         } else if (pixel_format == PixelFormat::RGB8 && GLES) { | ||||||
|             Pica::Texture::ConvertBGRToRGB(gl_buffer, download_loc); |             Pica::Texture::ConvertBGRToRGB(gl_buffer, download_data); | ||||||
|         } else { |         } else { | ||||||
|             std::memcpy(download_loc.data() + start_offset, gl_buffer.data() + start_offset, flush_end - flush_start); |             std::memcpy(dest_buffer.data(), download_data.data(), download_size); | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         SwizzleTexture(*this, flush_start, flush_end, gl_buffer, download_loc); |         SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -69,8 +69,7 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp | |||||||
| } | } | ||||||
|  |  | ||||||
| template <bool morton_to_linear, PixelFormat format> | template <bool morton_to_linear, PixelFormat format> | ||||||
| static void MortonCopy(u32 stride, u32 height, | static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer, | ||||||
|                        std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer, |  | ||||||
|                        PAddr base, PAddr start, PAddr end) { |                        PAddr base, PAddr start, PAddr end) { | ||||||
|  |  | ||||||
|     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; |     constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; | ||||||
| @@ -83,6 +82,7 @@ static void MortonCopy(u32 stride, u32 height, | |||||||
|     // This only applies for D24 format, by shifting the span one byte all pixels |     // This only applies for D24 format, by shifting the span one byte all pixels | ||||||
|     // are written properly without byteswap |     // are written properly without byteswap | ||||||
|     u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; |     u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; | ||||||
|  |     u32 tiled_offset = 0; | ||||||
|  |  | ||||||
|     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); |     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); | ||||||
|     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); |     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); | ||||||
| @@ -97,7 +97,6 @@ static void MortonCopy(u32 stride, u32 height, | |||||||
|     // In OpenGL the texture origin is in the bottom left corner as opposed to other |     // In OpenGL the texture origin is in the bottom left corner as opposed to other | ||||||
|     // APIs that have it at the top left. To avoid flipping texture coordinates in |     // APIs that have it at the top left. To avoid flipping texture coordinates in | ||||||
|     // the shader we read/write the linear buffer backwards |     // the shader we read/write the linear buffer backwards | ||||||
|     //linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; |  | ||||||
|     linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; |     linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; | ||||||
|  |  | ||||||
|     auto linear_next_tile = [&] { |     auto linear_next_tile = [&] { | ||||||
| @@ -113,24 +112,16 @@ static void MortonCopy(u32 stride, u32 height, | |||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     u8* tile_buffer; |  | ||||||
|     if constexpr (morton_to_linear) { |  | ||||||
|         tile_buffer = (u8*)tiled_buffer.data(); |  | ||||||
|     } else { |  | ||||||
|         tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // If during a texture download the start coordinate is inside a tile, swizzle |     // If during a texture download the start coordinate is inside a tile, swizzle | ||||||
|     // the tile to a temporary buffer and copy the part we are interested in |     // the tile to a temporary buffer and copy the part we are interested in | ||||||
|     if (start < aligned_start && !morton_to_linear) { |     if (start < aligned_start && !morton_to_linear) { | ||||||
|         std::array<std::byte, tile_size> tmp_buf; |         std::array<std::byte, tile_size> tmp_buf; | ||||||
|         std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); |         auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); | ||||||
|  |  | ||||||
|         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); |         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); | ||||||
|         std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start, |         std::memcpy(tiled_buffer.data(), tmp_buf.data() + start - aligned_down_start, | ||||||
|                     std::min(aligned_start, end) - start); |                     std::min(aligned_start, end) - start); | ||||||
|  |  | ||||||
|         tile_buffer += aligned_start - start; |         tiled_offset += aligned_start - start; | ||||||
|         linear_next_tile(); |         linear_next_tile(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -143,21 +134,20 @@ static void MortonCopy(u32 stride, u32 height, | |||||||
|         aligned_end = clamped_end; |         aligned_end = clamped_end; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const u8* buffer_end = tile_buffer + aligned_end - aligned_start; |     const u32 buffer_end = tiled_offset + aligned_end - aligned_start; | ||||||
|     while (tile_buffer < buffer_end) { |     while (tiled_offset < buffer_end) { | ||||||
|         std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); |         auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); | ||||||
|         auto tiled_data = std::span<std::byte>{(std::byte*)tile_buffer, tile_size}; |         auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); | ||||||
|  |  | ||||||
|         MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data); |         MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data); | ||||||
|         tile_buffer += tile_size; |         tiled_offset += tile_size; | ||||||
|         linear_next_tile(); |         linear_next_tile(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { |     if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { | ||||||
|         std::array<std::byte, tile_size> tmp_buf; |         std::array<std::byte, tile_size> tmp_buf; | ||||||
|         std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); |         auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); | ||||||
|         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); |         MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); | ||||||
|         std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end); |         std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end - aligned_end); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -77,14 +77,18 @@ void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | |||||||
|         tex_info.SetDefaultStride(); |         tex_info.SetDefaultStride(); | ||||||
|         tex_info.physical_address = params.addr; |         tex_info.physical_address = params.addr; | ||||||
|  |  | ||||||
|         const u32 start_pixel = params.PixelsInBytes(load_start - params.addr); |         const SurfaceInterval load_interval(load_start, load_end); | ||||||
|         const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data()); |         const auto rect = params.GetSubRect(params.FromInterval(load_interval)); | ||||||
|         for (u32 i = 0; i < params.PixelsInBytes(load_end - load_start); i++) { |         DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval); | ||||||
|             const u32 x = (i + start_pixel) % params.stride; |  | ||||||
|             const u32 y = (i + start_pixel) / params.stride; |  | ||||||
|  |  | ||||||
|             auto vec4 = Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); |         const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data()); | ||||||
|             std::memcpy(dest_linear.data() + i * sizeof(u32), vec4.AsArray(), sizeof(u32)); |         for (u32 y = rect.bottom; y < rect.top; y++) { | ||||||
|  |             for (u32 x = rect.left; x < rect.right; x++) { | ||||||
|  |                 auto vec4 = | ||||||
|  |                     Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); | ||||||
|  |                 const std::size_t offset = (x + (params.width * y)) * 4; | ||||||
|  |                 std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4); | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     } else { |     } else { | ||||||
|   | |||||||
| @@ -53,6 +53,14 @@ class SurfaceParams; | |||||||
| void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, | ||||||
|                     std::span<std::byte> source_linear, std::span<std::byte> dest_tiled); |                     std::span<std::byte> source_linear, std::span<std::byte> dest_tiled); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Converts a morton swizzled texture to linear format. | ||||||
|  |  * | ||||||
|  |  * @param params Structure used to query the surface information. | ||||||
|  |  * @param load_start, load_end The address range to unswizzle texture data. | ||||||
|  |  * @param source_tiled The source swizzled data. The span begins at surface base address not load_start. | ||||||
|  |  * @param dest_linear The output buffer where the generated linear data will be written to. | ||||||
|  |  */ | ||||||
| void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, | ||||||
|                       std::span<std::byte> source_tiled, std::span<std::byte> dest_linear); |                       std::span<std::byte> source_tiled, std::span<std::byte> dest_linear); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user