rasterizer_cache: Explicitely pass end_offset to swizzle functions

* This addresses overflow issues
This commit is contained in:
GPUCode
2022-09-25 16:50:56 +03:00
parent 9e8c403793
commit 8e1a23d971
5 changed files with 18 additions and 20 deletions

View File

@ -136,7 +136,7 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp
} }
template <bool morton_to_linear, PixelFormat format> template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 height, u32 start_offset, static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
std::span<std::byte> linear_buffer, std::span<std::byte> linear_buffer,
std::span<std::byte> tiled_buffer) { std::span<std::byte> tiled_buffer) {
@ -148,7 +148,6 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset,
// becomes zero for 4-bit textures! // becomes zero for 4-bit textures!
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel; const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
const u32 end_offset = start_offset + static_cast<u32>(tiled_buffer.size());
// Does this line have any significance? // Does this line have any significance?
//u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; //u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
@ -216,7 +215,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset,
} }
} }
using MortonFunc = void (*)(u32, u32, u32, std::span<std::byte>, std::span<std::byte>); using MortonFunc = void (*)(u32, u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = { static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
MortonCopy<true, PixelFormat::RGBA8>, // 0 MortonCopy<true, PixelFormat::RGBA8>, // 0

View File

@ -894,20 +894,20 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
ASSERT(load_start >= surface->addr && load_end <= surface->end); ASSERT(load_start >= surface->addr && load_end <= surface->end);
const auto& staging = runtime.FindStaging( const auto& staging = runtime.FindStaging(
surface->width * surface->height * GetBytesPerPixel(surface->pixel_format) * 2, true); surface->width * surface->height * 4, true);
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr);
if (!source_ptr) [[unlikely]] { if (!source_ptr) [[unlikely]] {
return; return;
} }
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
const u32 start_offset = load_start - surface->addr;
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
if (surface->is_tiled) { if (surface->is_tiled) {
std::vector<std::byte> unswizzled_data(staging.size); std::vector<std::byte> unswizzled_data(staging.size);
UnswizzleTexture(*surface, start_offset, upload_data, unswizzled_data); UnswizzleTexture(*surface, load_start - surface->addr, load_end - surface->addr,
upload_data, unswizzled_data);
runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped); runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped);
} else { } else {
runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped); runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped);
@ -931,7 +931,7 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
ASSERT(flush_start >= surface->addr && flush_end <= surface->end); ASSERT(flush_start >= surface->addr && flush_end <= surface->end);
const auto& staging = runtime.FindStaging( const auto& staging = runtime.FindStaging(
surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); surface->width * surface->height * 4, false);
const SurfaceParams params = surface->FromInterval(interval); const SurfaceParams params = surface->FromInterval(interval);
const BufferTextureCopy download = { const BufferTextureCopy download = {
.buffer_offset = 0, .buffer_offset = 0,
@ -948,13 +948,13 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
} }
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
const u32 start_offset = flush_start - surface->addr;
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
if (surface->is_tiled) { if (surface->is_tiled) {
std::vector<std::byte> swizzled_data(staging.size); std::vector<std::byte> swizzled_data(staging.size);
SwizzleTexture(*surface, start_offset, staging.mapped, swizzled_data); SwizzleTexture(*surface, flush_start - surface->addr, flush_end - surface->addr,
staging.mapped, swizzled_data);
runtime.FormatConvert(surface->pixel_format, false, swizzled_data, download_dest); runtime.FormatConvert(surface->pixel_format, false, swizzled_data, download_dest);
} else { } else {
runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest); runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest);

View File

@ -3,30 +3,26 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #pragma once
#include <glad/glad.h>
#include "common/assert.h" #include "common/assert.h"
#include "core/memory.h"
#include "video_core/texture/texture_decode.h" #include "video_core/texture/texture_decode.h"
#include "video_core/rasterizer_cache/morton_swizzle.h" #include "video_core/rasterizer_cache/morton_swizzle.h"
#include "video_core/rasterizer_cache/surface_params.h" #include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h" #include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/video_core.h"
namespace VideoCore { namespace VideoCore {
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) { std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
const u32 func_index = static_cast<u32>(params.pixel_format); const u32 func_index = static_cast<u32>(params.pixel_format);
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index]; const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
SwizzleImpl(params.stride, params.height, start_offset, source_linear, dest_tiled); SwizzleImpl(params.stride, params.height, start_offset, end_offset, source_linear, dest_tiled);
} }
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) { std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
const u32 func_index = static_cast<u32>(params.pixel_format); const u32 func_index = static_cast<u32>(params.pixel_format);
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index]; const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
UnswizzleImpl(params.stride, params.height, start_offset, dest_linear, source_tiled); UnswizzleImpl(params.stride, params.height, start_offset, end_offset, dest_linear, source_tiled);
} }
ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) { ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) {
@ -68,4 +64,4 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d
return result; return result;
} }
} // namespace OpenGL } // namespace VideoCore

View File

@ -44,7 +44,7 @@ class SurfaceParams;
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled); std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
/** /**
@ -55,7 +55,7 @@ void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
* @param source_tiled The source morton swizzled data. * @param source_tiled The source morton swizzled data.
* @param dest_linear The output buffer where the generated linear data will be written to. * @param dest_linear The output buffer where the generated linear data will be written to.
*/ */
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear); std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
} // namespace VideoCore } // namespace VideoCore

View File

@ -215,6 +215,8 @@ void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
return Pica::Texture::ConvertABGRToRGBA(source, dest); return Pica::Texture::ConvertABGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::RGB8 && upload) { } else if (format == VideoCore::PixelFormat::RGB8 && upload) {
return Pica::Texture::ConvertBGRToRGBA(source, dest); return Pica::Texture::ConvertBGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::D24S8 && !upload) {
return; // HACK: Skip depth download
} else if (instance.IsFormatSupported(ToVkFormat(format), feature)) { } else if (instance.IsFormatSupported(ToVkFormat(format), feature)) {
std::memcpy(dest.data(), source.data(), source.size()); std::memcpy(dest.data(), source.data(), source.size());
} else { } else {
@ -624,6 +626,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
copy_regions[region_count++] = copy_region; copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
return; // HACK: Skip depth + stencil downloads for now
copy_region.bufferOffset += staging.mapped.size(); copy_region.bufferOffset += staging.mapped.size();
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil; copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region; copy_regions[region_count++] = copy_region;