From fa870be263e63ff96c7dec71fd0a4ad5e902e844 Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Sat, 10 Sep 2022 14:57:10 +0300 Subject: [PATCH] morton_swizzle: Use tiled_buffer instead of reading data from g_memory * It's much safer and removes hardcoded global state usage --- .../rasterizer_cache/cached_surface.cpp | 44 +++++++++++-------- .../rasterizer_cache/morton_swizzle.h | 34 +++++--------- src/video_core/rasterizer_cache/utils.cpp | 18 +++++--- src/video_core/rasterizer_cache/utils.h | 8 ++++ 4 files changed, 57 insertions(+), 47 deletions(-) diff --git a/src/video_core/rasterizer_cache/cached_surface.cpp b/src/video_core/rasterizer_cache/cached_surface.cpp index 552413758..09aa5aba2 100644 --- a/src/video_core/rasterizer_cache/cached_surface.cpp +++ b/src/video_core/rasterizer_cache/cached_surface.cpp @@ -26,13 +26,16 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { DEBUG_ASSERT(load_start >= addr && load_end <= end); - auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start); - if (!source_ptr) { + // We start reading from addr instead of load_start, otherwise LookupTexture + // in UnswizzleTexture will not sample texels correctly + auto source_ptr = VideoCore::g_memory->GetPhysicalRef(addr); + if (!source_ptr) [[unlikely]] { return; } - const auto upload_size = std::clamp(load_end - load_start, 0u, source_ptr.GetSize()); - const auto upload_data = source_ptr.GetBytes(upload_size); + const auto start_offset = load_start - addr; + const auto texture_data = source_ptr.GetBytes(load_end - addr); + const auto upload_size = texture_data.size() - start_offset; if (gl_buffer.empty()) { gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); @@ -42,15 +45,18 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { if (!is_tiled) { ASSERT(type == SurfaceType::Color); + + auto upload_data = texture_data.subspan(start_offset, upload_size); + auto dest_buffer = std::span{gl_buffer}.subspan(start_offset, upload_size); if (pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(upload_data, gl_buffer); + Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); } else if (pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer); + Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); } else { - std::memcpy(gl_buffer.data() + load_start - addr, source_ptr, upload_size); + std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); } } else { - UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer); + UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer); } } @@ -59,15 +65,14 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { DEBUG_ASSERT(flush_start >= addr && flush_end <= end); auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr); - if (!dest_ptr) { + if (!dest_ptr) [[unlikely]] { return; } - const auto download_size = std::clamp(flush_end - flush_start, 0u, dest_ptr.GetSize()); - const auto download_loc = dest_ptr.GetBytes(download_size); - - const u32 start_offset = flush_start - addr; - const u32 end_offset = flush_end - addr; + const auto start_offset = flush_start - addr; + const auto end_offset = flush_end - addr; + const auto texture_data = dest_ptr.GetBytes(flush_end - addr); + const auto download_size = texture_data.size() - start_offset; MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); @@ -88,15 +93,18 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); } else if (!is_tiled) { ASSERT(type == SurfaceType::Color); + + auto dest_buffer = texture_data.subspan(start_offset, download_size); + auto download_data = std::span{gl_buffer}.subspan(start_offset, download_size); if (pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_loc); + Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_data); } else if (pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(gl_buffer, download_loc); + Pica::Texture::ConvertBGRToRGB(gl_buffer, download_data); } else { - std::memcpy(download_loc.data() + start_offset, gl_buffer.data() + start_offset, flush_end - flush_start); + std::memcpy(dest_buffer.data(), download_data.data(), download_size); } } else { - SwizzleTexture(*this, flush_start, flush_end, gl_buffer, download_loc); + SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data); } } diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index 138f7c52c..f5800b5d1 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -69,8 +69,7 @@ inline void MortonCopyTile(u32 stride, std::span tile_buffer, std::sp } template -static void MortonCopy(u32 stride, u32 height, - std::span linear_buffer, std::span tiled_buffer, +static void MortonCopy(u32 stride, u32 height, std::span linear_buffer, std::span tiled_buffer, PAddr base, PAddr start, PAddr end) { constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; @@ -83,6 +82,7 @@ static void MortonCopy(u32 stride, u32 height, // This only applies for D24 format, by shifting the span one byte all pixels // are written properly without byteswap u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; + u32 tiled_offset = 0; const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); @@ -97,7 +97,6 @@ static void MortonCopy(u32 stride, u32 height, // In OpenGL the texture origin is in the bottom left corner as opposed to other // APIs that have it at the top left. To avoid flipping texture coordinates in // the shader we read/write the linear buffer backwards - //linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; auto linear_next_tile = [&] { @@ -113,24 +112,16 @@ static void MortonCopy(u32 stride, u32 height, } }; - u8* tile_buffer; - if constexpr (morton_to_linear) { - tile_buffer = (u8*)tiled_buffer.data(); - } else { - tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); - } - // If during a texture download the start coordinate is inside a tile, swizzle // the tile to a temporary buffer and copy the part we are interested in if (start < aligned_start && !morton_to_linear) { std::array tmp_buf; - std::span linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); - + auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); MortonCopyTile(stride, tmp_buf, linear_data); - std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start, + std::memcpy(tiled_buffer.data(), tmp_buf.data() + start - aligned_down_start, std::min(aligned_start, end) - start); - tile_buffer += aligned_start - start; + tiled_offset += aligned_start - start; linear_next_tile(); } @@ -143,21 +134,20 @@ static void MortonCopy(u32 stride, u32 height, aligned_end = clamped_end; } - const u8* buffer_end = tile_buffer + aligned_end - aligned_start; - while (tile_buffer < buffer_end) { - std::span linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); - auto tiled_data = std::span{(std::byte*)tile_buffer, tile_size}; - + const u32 buffer_end = tiled_offset + aligned_end - aligned_start; + while (tiled_offset < buffer_end) { + auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); + auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); MortonCopyTile(stride, tiled_data, linear_data); - tile_buffer += tile_size; + tiled_offset += tile_size; linear_next_tile(); } if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { std::array tmp_buf; - std::span linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); + auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); MortonCopyTile(stride, tmp_buf, linear_data); - std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end); + std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end - aligned_end); } } diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index 40b2137c3..d2ab0b434 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -77,14 +77,18 @@ void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, tex_info.SetDefaultStride(); tex_info.physical_address = params.addr; - const u32 start_pixel = params.PixelsInBytes(load_start - params.addr); - const u8* source_data = reinterpret_cast(source_tiled.data()); - for (u32 i = 0; i < params.PixelsInBytes(load_end - load_start); i++) { - const u32 x = (i + start_pixel) % params.stride; - const u32 y = (i + start_pixel) / params.stride; + const SurfaceInterval load_interval(load_start, load_end); + const auto rect = params.GetSubRect(params.FromInterval(load_interval)); + DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval); - auto vec4 = Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); - std::memcpy(dest_linear.data() + i * sizeof(u32), vec4.AsArray(), sizeof(u32)); + const u8* source_data = reinterpret_cast(source_tiled.data()); + for (u32 y = rect.bottom; y < rect.top; y++) { + for (u32 x = rect.left; x < rect.right; x++) { + auto vec4 = + Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); + const std::size_t offset = (x + (params.width * y)) * 4; + std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4); + } } } else { diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index 660655c04..d90dfba24 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -53,6 +53,14 @@ class SurfaceParams; void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, std::span source_linear, std::span dest_tiled); +/** + * Converts a morton swizzled texture to linear format. + * + * @param params Structure used to query the surface information. + * @param load_start, load_end The address range to unswizzle texture data. + * @param source_tiled The source swizzled data. The span begins at surface base address not load_start. + * @param dest_linear The output buffer where the generated linear data will be written to. + */ void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, std::span source_tiled, std::span dest_linear);