morton_swizzle: Use tiled_buffer instead of reading data from g_memory

* It's much safer and removes hardcoded global state usage
This commit is contained in:
emufan4568
2022-09-10 14:57:10 +03:00
committed by GPUCode
parent 3a6d19f51f
commit fa870be263
4 changed files with 57 additions and 47 deletions

View File

@ -26,13 +26,16 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa
void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
DEBUG_ASSERT(load_start >= addr && load_end <= end); DEBUG_ASSERT(load_start >= addr && load_end <= end);
auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start); // We start reading from addr instead of load_start, otherwise LookupTexture
if (!source_ptr) { // in UnswizzleTexture will not sample texels correctly
auto source_ptr = VideoCore::g_memory->GetPhysicalRef(addr);
if (!source_ptr) [[unlikely]] {
return; return;
} }
const auto upload_size = std::clamp<std::size_t>(load_end - load_start, 0u, source_ptr.GetSize()); const auto start_offset = load_start - addr;
const auto upload_data = source_ptr.GetBytes(upload_size); const auto texture_data = source_ptr.GetBytes(load_end - addr);
const auto upload_size = texture_data.size() - start_offset;
if (gl_buffer.empty()) { if (gl_buffer.empty()) {
gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
@ -42,15 +45,18 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
if (!is_tiled) { if (!is_tiled) {
ASSERT(type == SurfaceType::Color); ASSERT(type == SurfaceType::Color);
auto upload_data = texture_data.subspan(start_offset, upload_size);
auto dest_buffer = std::span{gl_buffer}.subspan(start_offset, upload_size);
if (pixel_format == PixelFormat::RGBA8 && GLES) { if (pixel_format == PixelFormat::RGBA8 && GLES) {
Pica::Texture::ConvertABGRToRGBA(upload_data, gl_buffer); Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer);
} else if (pixel_format == PixelFormat::RGB8 && GLES) { } else if (pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer); Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer);
} else { } else {
std::memcpy(gl_buffer.data() + load_start - addr, source_ptr, upload_size); std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
} }
} else { } else {
UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer); UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer);
} }
} }
@ -59,15 +65,14 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
DEBUG_ASSERT(flush_start >= addr && flush_end <= end); DEBUG_ASSERT(flush_start >= addr && flush_end <= end);
auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr); auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr);
if (!dest_ptr) { if (!dest_ptr) [[unlikely]] {
return; return;
} }
const auto download_size = std::clamp<std::size_t>(flush_end - flush_start, 0u, dest_ptr.GetSize()); const auto start_offset = flush_start - addr;
const auto download_loc = dest_ptr.GetBytes(download_size); const auto end_offset = flush_end - addr;
const auto texture_data = dest_ptr.GetBytes(flush_end - addr);
const u32 start_offset = flush_start - addr; const auto download_size = texture_data.size() - start_offset;
const u32 end_offset = flush_end - addr;
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
@ -88,15 +93,18 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes);
} else if (!is_tiled) { } else if (!is_tiled) {
ASSERT(type == SurfaceType::Color); ASSERT(type == SurfaceType::Color);
auto dest_buffer = texture_data.subspan(start_offset, download_size);
auto download_data = std::span{gl_buffer}.subspan(start_offset, download_size);
if (pixel_format == PixelFormat::RGBA8 && GLES) { if (pixel_format == PixelFormat::RGBA8 && GLES) {
Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_loc); Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_data);
} else if (pixel_format == PixelFormat::RGB8 && GLES) { } else if (pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(gl_buffer, download_loc); Pica::Texture::ConvertBGRToRGB(gl_buffer, download_data);
} else { } else {
std::memcpy(download_loc.data() + start_offset, gl_buffer.data() + start_offset, flush_end - flush_start); std::memcpy(dest_buffer.data(), download_data.data(), download_size);
} }
} else { } else {
SwizzleTexture(*this, flush_start, flush_end, gl_buffer, download_loc); SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data);
} }
} }

View File

@ -69,8 +69,7 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp
} }
template <bool morton_to_linear, PixelFormat format> template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 height, static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer,
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer,
PAddr base, PAddr start, PAddr end) { PAddr base, PAddr start, PAddr end) {
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
@ -83,6 +82,7 @@ static void MortonCopy(u32 stride, u32 height,
// This only applies for D24 format, by shifting the span one byte all pixels // This only applies for D24 format, by shifting the span one byte all pixels
// are written properly without byteswap // are written properly without byteswap
u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
u32 tiled_offset = 0;
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
@ -97,7 +97,6 @@ static void MortonCopy(u32 stride, u32 height,
// In OpenGL the texture origin is in the bottom left corner as opposed to other // In OpenGL the texture origin is in the bottom left corner as opposed to other
// APIs that have it at the top left. To avoid flipping texture coordinates in // APIs that have it at the top left. To avoid flipping texture coordinates in
// the shader we read/write the linear buffer backwards // the shader we read/write the linear buffer backwards
//linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
auto linear_next_tile = [&] { auto linear_next_tile = [&] {
@ -113,24 +112,16 @@ static void MortonCopy(u32 stride, u32 height,
} }
}; };
u8* tile_buffer;
if constexpr (morton_to_linear) {
tile_buffer = (u8*)tiled_buffer.data();
} else {
tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start);
}
// If during a texture download the start coordinate is inside a tile, swizzle // If during a texture download the start coordinate is inside a tile, swizzle
// the tile to a temporary buffer and copy the part we are interested in // the tile to a temporary buffer and copy the part we are interested in
if (start < aligned_start && !morton_to_linear) { if (start < aligned_start && !morton_to_linear) {
std::array<std::byte, tile_size> tmp_buf; std::array<std::byte, tile_size> tmp_buf;
std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start, std::memcpy(tiled_buffer.data(), tmp_buf.data() + start - aligned_down_start,
std::min(aligned_start, end) - start); std::min(aligned_start, end) - start);
tile_buffer += aligned_start - start; tiled_offset += aligned_start - start;
linear_next_tile(); linear_next_tile();
} }
@ -143,21 +134,20 @@ static void MortonCopy(u32 stride, u32 height,
aligned_end = clamped_end; aligned_end = clamped_end;
} }
const u8* buffer_end = tile_buffer + aligned_end - aligned_start; const u32 buffer_end = tiled_offset + aligned_end - aligned_start;
while (tile_buffer < buffer_end) { while (tiled_offset < buffer_end) {
std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
auto tiled_data = std::span<std::byte>{(std::byte*)tile_buffer, tile_size}; auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data); MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data);
tile_buffer += tile_size; tiled_offset += tile_size;
linear_next_tile(); linear_next_tile();
} }
if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) { if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) {
std::array<std::byte, tile_size> tmp_buf; std::array<std::byte, tile_size> tmp_buf;
std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset); auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data); MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end); std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end - aligned_end);
} }
} }

View File

@ -77,14 +77,18 @@ void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
tex_info.SetDefaultStride(); tex_info.SetDefaultStride();
tex_info.physical_address = params.addr; tex_info.physical_address = params.addr;
const u32 start_pixel = params.PixelsInBytes(load_start - params.addr); const SurfaceInterval load_interval(load_start, load_end);
const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data()); const auto rect = params.GetSubRect(params.FromInterval(load_interval));
for (u32 i = 0; i < params.PixelsInBytes(load_end - load_start); i++) { DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval);
const u32 x = (i + start_pixel) % params.stride;
const u32 y = (i + start_pixel) / params.stride;
auto vec4 = Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info); const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data());
std::memcpy(dest_linear.data() + i * sizeof(u32), vec4.AsArray(), sizeof(u32)); for (u32 y = rect.bottom; y < rect.top; y++) {
for (u32 x = rect.left; x < rect.right; x++) {
auto vec4 =
Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info);
const std::size_t offset = (x + (params.width * y)) * 4;
std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4);
}
} }
} else { } else {

View File

@ -53,6 +53,14 @@ class SurfaceParams;
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end, void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled); std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
/**
* Converts a morton swizzled texture to linear format.
*
* @param params Structure used to query the surface information.
* @param load_start, load_end The address range to unswizzle texture data.
* @param source_tiled The source swizzled data. The span begins at surface base address not load_start.
* @param dest_linear The output buffer where the generated linear data will be written to.
*/
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end, void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear); std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);