texture_cache: Use pre-allocated buffer for texture uploads
This commit is contained in:
		| @@ -39,6 +39,12 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||||||
|     sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); |     sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||||||
|     sampler_descriptor.cubemap_anisotropy.Assign(1); |     sampler_descriptor.cubemap_anisotropy.Assign(1); | ||||||
|  |  | ||||||
|  |     // These values were chosen based on typical peak swizzle data sizes seen in some titles | ||||||
|  |     static constexpr size_t SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 8_MiB; | ||||||
|  |     static constexpr size_t UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 1_MiB; | ||||||
|  |     swizzle_data_buffer.reserve(SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY); | ||||||
|  |     unswizzle_data_buffer.reserve(UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY); | ||||||
|  |  | ||||||
|     // Make sure the first index is reserved for the null resources |     // Make sure the first index is reserved for the null resources | ||||||
|     // This way the null resource becomes a compile time constant |     // This way the null resource becomes a compile time constant | ||||||
|     void(slot_images.insert(NullImageParams{})); |     void(slot_images.insert(NullImageParams{})); | ||||||
| @@ -734,13 +740,21 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||||||
|         gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |         gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||||||
|         const auto uploads = FullUploadSwizzles(image.info); |         const auto uploads = FullUploadSwizzles(image.info); | ||||||
|         runtime.AccelerateImageUpload(image, staging, uploads); |         runtime.AccelerateImageUpload(image, staging, uploads); | ||||||
|     } else if (True(image.flags & ImageFlagBits::Converted)) { |         return; | ||||||
|         std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); |     } | ||||||
|         auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); |     const size_t guest_size_bytes = image.guest_size_bytes; | ||||||
|         ConvertImage(unswizzled_data, image.info, mapped_span, copies); |     swizzle_data_buffer.resize(guest_size_bytes); | ||||||
|  |     gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); | ||||||
|  |  | ||||||
|  |     if (True(image.flags & ImageFlagBits::Converted)) { | ||||||
|  |         unswizzle_data_buffer.resize(image.unswizzled_size_bytes); | ||||||
|  |         auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, | ||||||
|  |                                      unswizzle_data_buffer); | ||||||
|  |         ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); | ||||||
|         image.UploadMemory(staging, copies); |         image.UploadMemory(staging, copies); | ||||||
|     } else { |     } else { | ||||||
|         const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); |         const auto copies = | ||||||
|  |             UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); | ||||||
|         image.UploadMemory(staging, copies); |         image.UploadMemory(staging, copies); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -910,7 +924,7 @@ void TextureCache<P>::InvalidateScale(Image& image) { | |||||||
| } | } | ||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { | u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) { | ||||||
|     const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * |     const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * | ||||||
|                                           Settings::values.resolution_info.up_scale); |                                           Settings::values.resolution_info.up_scale); | ||||||
|     const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + |     const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + | ||||||
|   | |||||||
| @@ -368,7 +368,7 @@ private: | |||||||
|     void InvalidateScale(Image& image); |     void InvalidateScale(Image& image); | ||||||
|     bool ScaleUp(Image& image); |     bool ScaleUp(Image& image); | ||||||
|     bool ScaleDown(Image& image); |     bool ScaleDown(Image& image); | ||||||
|     u64 GetScaledImageSizeBytes(ImageBase& image); |     u64 GetScaledImageSizeBytes(const ImageBase& image); | ||||||
|  |  | ||||||
|     Runtime& runtime; |     Runtime& runtime; | ||||||
|  |  | ||||||
| @@ -417,6 +417,9 @@ private: | |||||||
|  |  | ||||||
|     std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; |     std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | ||||||
|  |  | ||||||
|  |     std::vector<u8> swizzle_data_buffer; | ||||||
|  |     std::vector<u8> unswizzle_data_buffer; | ||||||
|  |  | ||||||
|     u64 modification_tick = 0; |     u64 modification_tick = 0; | ||||||
|     u64 frame_tick = 0; |     u64 frame_tick = 0; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -765,8 +765,9 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config | |||||||
| } | } | ||||||
|  |  | ||||||
| std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||||||
|                                             const ImageInfo& info, std::span<u8> output) { |                                             const ImageInfo& info, std::span<const u8> input, | ||||||
|     const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); |                                             std::span<u8> output) { | ||||||
|  |     const size_t guest_size_bytes = input.size_bytes(); | ||||||
|     const u32 bpp_log2 = BytesPerBlockLog2(info.format); |     const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||||||
|     const Extent3D size = info.size; |     const Extent3D size = info.size; | ||||||
|  |  | ||||||
| @@ -789,10 +790,6 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP | |||||||
|             .image_extent = size, |             .image_extent = size, | ||||||
|         }}; |         }}; | ||||||
|     } |     } | ||||||
|     const auto input_data = std::make_unique<u8[]>(guest_size_bytes); |  | ||||||
|     gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); |  | ||||||
|     const std::span<const u8> input(input_data.get(), guest_size_bytes); |  | ||||||
|  |  | ||||||
|     const LevelInfo level_info = MakeLevelInfo(info); |     const LevelInfo level_info = MakeLevelInfo(info); | ||||||
|     const s32 num_layers = info.resources.layers; |     const s32 num_layers = info.resources.layers; | ||||||
|     const s32 num_levels = info.resources.levels; |     const s32 num_levels = info.resources.levels; | ||||||
|   | |||||||
| @@ -59,6 +59,7 @@ struct OverlapResult { | |||||||
|  |  | ||||||
| [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | ||||||
|                                                           GPUVAddr gpu_addr, const ImageInfo& info, |                                                           GPUVAddr gpu_addr, const ImageInfo& info, | ||||||
|  |                                                           std::span<const u8> input, | ||||||
|                                                           std::span<u8> output); |                                                           std::span<u8> output); | ||||||
|  |  | ||||||
| [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user