Texture Cache: fix memory managment and optimize scaled downloads, uploads.

This commit is contained in:
Fernando Sahmkow 2021-10-18 22:56:36 +02:00
parent c2ca55c9d5
commit 3b61de74e6
7 changed files with 57 additions and 28 deletions

View File

@ -697,7 +697,7 @@ void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) { std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) { if (is_rescaled) {
ScaleDown(); ScaleDown(true);
} }
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
@ -725,6 +725,10 @@ void Image::UploadMemory(const ImageBufferMap& map,
void Image::DownloadMemory(ImageBufferMap& map, void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) { std::span<const VideoCommon::BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer);
glPixelStorei(GL_PACK_ALIGNMENT, 1); glPixelStorei(GL_PACK_ALIGNMENT, 1);
@ -743,6 +747,9 @@ void Image::DownloadMemory(ImageBufferMap& map,
} }
CopyImageToBuffer(copy, map.offset); CopyImageToBuffer(copy, map.offset);
} }
if (is_rescaled) {
ScaleUp(true);
}
} }
GLuint Image::StorageHandle() noexcept { GLuint Image::StorageHandle() noexcept {
@ -979,7 +986,7 @@ bool Image::Scale(bool up_scale) {
return true; return true;
} }
bool Image::ScaleUp() { bool Image::ScaleUp(bool ignore) {
if (True(flags & ImageFlagBits::Rescaled)) { if (True(flags & ImageFlagBits::Rescaled)) {
return false; return false;
} }
@ -997,7 +1004,11 @@ bool Image::ScaleUp() {
flags &= ~ImageFlagBits::Rescaled; flags &= ~ImageFlagBits::Rescaled;
return false; return false;
} }
scale_count++; has_scaled = true;
if (ignore) {
current_texture = upscaled_backup.handle;
return true;
}
if (!Scale()) { if (!Scale()) {
flags &= ~ImageFlagBits::Rescaled; flags &= ~ImageFlagBits::Rescaled;
return false; return false;
@ -1005,7 +1016,7 @@ bool Image::ScaleUp() {
return true; return true;
} }
bool Image::ScaleDown() { bool Image::ScaleDown(bool ignore) {
if (False(flags & ImageFlagBits::Rescaled)) { if (False(flags & ImageFlagBits::Rescaled)) {
return false; return false;
} }
@ -1013,7 +1024,10 @@ bool Image::ScaleDown() {
if (!runtime->resolution.active) { if (!runtime->resolution.active) {
return false; return false;
} }
scale_count++; if (ignore) {
current_texture = texture.handle;
return true;
}
if (!Scale(false)) { if (!Scale(false)) {
flags &= ~ImageFlagBits::Rescaled; flags &= ~ImageFlagBits::Rescaled;
return false; return false;

View File

@ -196,9 +196,9 @@ public:
return gl_type; return gl_type;
} }
bool ScaleUp(); bool ScaleUp(bool ignore = false);
bool ScaleDown(); bool ScaleDown(bool ignore = false);
private: private:
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);

View File

@ -1055,7 +1055,7 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
// TODO: Move this to another API // TODO: Move this to another API
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) { if (is_rescaled) {
ScaleDown(); ScaleDown(true);
} }
scheduler->RequestOutsideRenderPassOperationContext(); scheduler->RequestOutsideRenderPassOperationContext();
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
@ -1073,6 +1073,10 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
} }
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
if (is_rescaled) {
ScaleDown();
}
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
scheduler->RequestOutsideRenderPassOperationContext(); scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask, scheduler->Record([buffer = map.buffer, image = *original_image, aspect_mask = aspect_mask,
@ -1125,9 +1129,12 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier); 0, memory_write_barrier, nullptr, image_write_barrier);
}); });
if (is_rescaled) {
ScaleUp(true);
}
} }
bool Image::ScaleUp() { bool Image::ScaleUp(bool ignore) {
if (True(flags & ImageFlagBits::Rescaled)) { if (True(flags & ImageFlagBits::Rescaled)) {
return false; return false;
} }
@ -1137,7 +1144,7 @@ bool Image::ScaleUp() {
if (!resolution.active) { if (!resolution.active) {
return false; return false;
} }
scale_count++; has_scaled = true;
const auto& device = runtime->device; const auto& device = runtime->device;
const bool is_2d = info.type == ImageType::e2D; const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width); const u32 scaled_width = resolution.ScaleUp(info.size.width);
@ -1149,8 +1156,12 @@ bool Image::ScaleUp() {
scaled_image = MakeImage(device, scaled_info); scaled_image = MakeImage(device, scaled_info);
auto& allocator = runtime->memory_allocator; auto& allocator = runtime->memory_allocator;
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
ignore = false;
} }
current_image = *scaled_image; current_image = *scaled_image;
if (ignore) {
return true;
}
if (aspect_mask == 0) { if (aspect_mask == 0) {
aspect_mask = ImageAspectMask(info.format); aspect_mask = ImageAspectMask(info.format);
@ -1212,7 +1223,7 @@ bool Image::ScaleUp() {
return true; return true;
} }
bool Image::ScaleDown() { bool Image::ScaleDown(bool ignore) {
if (False(flags & ImageFlagBits::Rescaled)) { if (False(flags & ImageFlagBits::Rescaled)) {
return false; return false;
} }
@ -1221,6 +1232,10 @@ bool Image::ScaleDown() {
if (!resolution.active) { if (!resolution.active) {
return false; return false;
} }
if (ignore) {
current_image = *original_image;
return true;
}
const auto& device = runtime->device; const auto& device = runtime->device;
const bool is_2d = info.type == ImageType::e2D; const bool is_2d = info.type == ImageType::e2D;
const u32 scaled_width = resolution.ScaleUp(info.size.width); const u32 scaled_width = resolution.ScaleUp(info.size.width);

View File

@ -129,9 +129,9 @@ public:
return std::exchange(initialized, true); return std::exchange(initialized, true);
} }
bool ScaleUp(); bool ScaleUp(bool ignore = false);
bool ScaleDown(); bool ScaleDown(bool ignore = false);
private: private:
VKScheduler* scheduler{}; VKScheduler* scheduler{};

View File

@ -61,7 +61,7 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
: info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{}, converted_size_bytes{CalculateConvertedSizeBytes(info)}, scale_rating{}, scale_tick{},
scale_count{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, has_scaled{}, gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_},
cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} { cpu_addr_end{cpu_addr + guest_size_bytes}, mip_level_offsets{CalculateMipLevelOffsets(info)} {
if (info.type == ImageType::e3D) { if (info.type == ImageType::e3D) {
slice_offsets = CalculateSliceOffsets(info); slice_offsets = CalculateSliceOffsets(info);

View File

@ -77,8 +77,8 @@ struct ImageBase {
void CheckBadOverlapState(); void CheckBadOverlapState();
void CheckAliasState(); void CheckAliasState();
bool HasScaled() { bool HasScaled() const {
return scale_count > 0; return has_scaled;
} }
ImageInfo info; ImageInfo info;
@ -88,7 +88,7 @@ struct ImageBase {
u32 converted_size_bytes = 0; u32 converted_size_bytes = 0;
u32 scale_rating = 0; u32 scale_rating = 0;
u64 scale_tick = 0; u64 scale_tick = 0;
u32 scale_count = 0; bool has_scaled = false;
ImageFlagBits flags = ImageFlagBits::CpuModified; ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0; GPUVAddr gpu_addr = 0;

View File

@ -60,7 +60,7 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
// On OpenGL we can be more conservatives as the driver takes care. // On OpenGL we can be more conservatives as the driver takes care.
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
minimum_memory = expected_memory; minimum_memory = 0;
} }
} }
@ -1464,16 +1464,6 @@ template <class P>
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked)); ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked; image.flags |= ImageFlagBits::Tracked;
if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
if (False(image.flags & ImageFlagBits::Sparse)) { if (False(image.flags & ImageFlagBits::Sparse)) {
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
return; return;
@ -1519,6 +1509,16 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
template <class P> template <class P>
void TextureCache<P>::DeleteImage(ImageId image_id) { void TextureCache<P>::DeleteImage(ImageId image_id) {
ImageBase& image = slot_images[image_id]; ImageBase& image = slot_images[image_id];
if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr; const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr); const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) { if (alloc_it == image_allocs_table.end()) {