custom_tex_manager: Multithread custom texture loading and decode

* Each texture has an atomic flag to signal to the backend when decoding is finished

* Don't store the file data as well to conserve RAM.
This commit is contained in:
GPUCode
2023-02-28 23:35:10 +02:00
parent 8396ce0b47
commit 19617f32c8
10 changed files with 194 additions and 88 deletions

View File

@@ -72,11 +72,11 @@ bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data) {
return true; return true;
} }
bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& width, u32& height, bool ParseDDSKTX(std::span<const u8> dds_data, size_t& decoded_size, u32& width, u32& height,
ddsktx_format& format) { ddsktx_format& format) {
ddsktx_texture_info tc{}; ddsktx_texture_info tc{};
const int size = static_cast<int>(in_data.size()); const int size = static_cast<int>(dds_data.size());
if (!ddsktx_parse(&tc, in_data.data(), size, nullptr)) { if (!ddsktx_parse(&tc, dds_data.data(), size, nullptr)) {
return false; return false;
} }
@@ -85,9 +85,23 @@ bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& wi
format = tc.format; format = tc.format;
ddsktx_sub_data sub_data{}; ddsktx_sub_data sub_data{};
ddsktx_get_sub(&tc, &sub_data, in_data.data(), size, 0, 0, 0); ddsktx_get_sub(&tc, &sub_data, dds_data.data(), size, 0, 0, 0);
decoded_size = sub_data.size_bytes;
out_data.resize(sub_data.size_bytes); return true;
}
bool LoadDDSKTX(std::span<const u8> dds_data, std::span<u8> out_data) {
ddsktx_texture_info tc{};
const int size = static_cast<int>(dds_data.size());
if (!ddsktx_parse(&tc, dds_data.data(), size, nullptr)) {
return false;
}
ddsktx_sub_data sub_data{};
ddsktx_get_sub(&tc, &sub_data, dds_data.data(), size, 0, 0, 0);
ASSERT(out_data.size() == sub_data.size_bytes);
std::memcpy(out_data.data(), sub_data.buff, sub_data.size_bytes); std::memcpy(out_data.data(), sub_data.buff, sub_data.size_bytes);
return true; return true;

View File

@@ -13,9 +13,11 @@ bool ParsePNG(std::span<const u8> png_data, size_t& decoded_size, u32& width, u3
bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data); bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data);
bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& width, u32& height, bool ParseDDSKTX(std::span<const u8> dds_data, size_t& decoded_size, u32& width, u32& height,
ddsktx_format& format); ddsktx_format& format);
bool LoadDDSKTX(std::span<const u8> dds_data, std::span<u8> out_data);
bool EncodePNG(const std::string& out_path, std::span<u8> in_data, u32 width, u32 height, bool EncodePNG(const std::string& out_path, std::span<u8> in_data, u32 width, u32 height,
s32 level = 6); s32 level = 6);

View File

@@ -99,6 +99,10 @@ public:
}); });
} }
const std::size_t NumWorkers() const noexcept {
return threads.size();
}
private: private:
std::queue<Task> requests; std::queue<Task> requests;
std::mutex queue_mutex; std::mutex queue_mutex;

View File

@@ -55,9 +55,7 @@ CustomPixelFormat ToCustomPixelFormat(ddsktx_format format) {
} // Anonymous namespace } // Anonymous namespace
CustomTexManager::CustomTexManager(Core::System& system_) CustomTexManager::CustomTexManager(Core::System& system_) : system{system_} {}
: system{system_}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1,
"Hires processing"} {}
CustomTexManager::~CustomTexManager() = default; CustomTexManager::~CustomTexManager() = default;
@@ -66,58 +64,91 @@ void CustomTexManager::FindCustomTextures() {
return; return;
} }
// If custom textures isn't enabled we don't want to create the thread pool
// so don't do it in the constructor, do it here instead.
workers = std::make_unique<Common::ThreadWorker>(
std::max(std::thread::hardware_concurrency(), 2U) - 1, "Custom textures");
// Custom textures are currently stored as // Custom textures are currently stored as
// [TitleID]/tex1_[width]x[height]_[64-bit hash]_[format].png // [TitleID]/tex1_[width]x[height]_[64-bit hash]_[format].png
using namespace FileUtil;
const u64 program_id = system.Kernel().GetCurrentProcess()->codeset->program_id; const u64 program_id = system.Kernel().GetCurrentProcess()->codeset->program_id;
const std::string load_path = const std::string load_path =
fmt::format("{}textures/{:016X}/", GetUserPath(UserPath::LoadDir), program_id); fmt::format("{}textures/{:016X}/", GetUserPath(FileUtil::UserPath::LoadDir), program_id);
// Create the directory if it did not exist // Create the directory if it did not exist
if (!Exists(load_path)) { if (!FileUtil::Exists(load_path)) {
CreateFullPath(load_path); FileUtil::CreateFullPath(load_path);
} }
FSTEntry texture_dir; FileUtil::FSTEntry texture_dir;
std::vector<FSTEntry> textures; std::vector<FileUtil::FSTEntry> textures;
// 64 nested folders should be plenty for most cases // 64 nested folders should be plenty for most cases
ScanDirectoryTree(load_path, texture_dir, 64); FileUtil::ScanDirectoryTree(load_path, texture_dir, 64);
GetAllFilesFromNestedEntries(texture_dir, textures); FileUtil::GetAllFilesFromNestedEntries(texture_dir, textures);
u32 width{}; // Reserve space for all the textures in the folder
u32 height{}; const size_t num_textures = textures.size();
u32 format{}; custom_textures.resize(num_textures);
unsigned long long hash{};
std::string ext(3, ' ');
for (const FSTEntry& file : textures) { const auto load = [&](u32 begin, u32 end) {
const std::string& path = file.physicalName; u32 width{};
if (file.isDirectory || !file.virtualName.starts_with("tex1_")) { u32 height{};
u32 format{};
unsigned long long hash{};
std::string ext(3, ' ');
for (u32 i = begin; i < end; i++) {
const auto& file = textures[i];
const std::string& path = file.physicalName;
if (file.isDirectory || !file.virtualName.starts_with("tex1_")) {
continue;
}
// Parse the texture filename. We only really care about the hash,
// the rest should be queried from the file itself.
if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.%s", &width, &height,
&hash, &format, ext.data()) != 5) {
continue;
}
custom_textures[i] = std::make_unique<CustomTexture>();
CustomTexture& texture = *custom_textures[i];
// Fill in relevant information
texture.file_format = MakeFileFormat(ext);
texture.hash = hash;
texture.path = path;
// Query the file for the rest
QueryTexture(texture);
}
};
const std::size_t num_workers{workers->NumWorkers()};
const std::size_t bucket_size{num_textures / num_workers};
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? num_textures : start + bucket_size};
workers->QueueWork([start, end, &load]() { load(start, end); });
}
workers->WaitForRequests();
// Assign each texture to the hash map
for (const auto& texture : custom_textures) {
if (!texture) {
continue; continue;
} }
const unsigned long long hash = texture->hash;
// Parse the texture filename. We only really care about the hash, auto [it, new_texture] = custom_texture_map.try_emplace(hash);
// the rest should be queried from the file itself.
if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.%s", &width, &height, &hash,
&format, ext.data()) != 5) {
continue;
}
auto [it, new_texture] = custom_textures.try_emplace(hash);
if (!new_texture) { if (!new_texture) {
LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!", custom_textures[hash].path, LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!",
path); custom_texture_map[hash]->path, texture->path);
continue; continue;
} }
it->second = texture.get();
auto& texture = it->second;
texture.file_format = MakeFileFormat(ext);
texture.path = path;
// Query the required information from the file and load it.
// Since this doesn't involve any decoding it shouldn't consume too much RAM.
LoadTexture(texture);
} }
textures_loaded = true; textures_loaded = true;
@@ -134,7 +165,6 @@ u64 CustomTexManager::ComputeHash(const SurfaceParams& params, std::span<u8> dat
// this must be done... // this must be done...
const auto decoded = std::span{temp_buffer.data(), decoded_size}; const auto decoded = std::span{temp_buffer.data(), decoded_size};
DecodeTexture(params, params.addr, params.end, data, decoded); DecodeTexture(params, params.addr, params.end, data, decoded);
return ComputeHash64(decoded.data(), decoded_size); return ComputeHash64(decoded.data(), decoded_size);
} }
@@ -185,65 +215,98 @@ void CustomTexManager::DumpTexture(const SurfaceParams& params, u32 level, std::
EncodePNG(dump_path, decoded, width, height); EncodePNG(dump_path, decoded, width, height);
}; };
workers.QueueWork(std::move(dump)); workers->QueueWork(std::move(dump));
dumped_textures.insert(data_hash); dumped_textures.insert(data_hash);
} }
const Texture& CustomTexManager::GetTexture(u64 data_hash) { CustomTexture& CustomTexManager::GetTexture(u64 data_hash) {
auto it = custom_textures.find(data_hash); auto it = custom_texture_map.find(data_hash);
if (it == custom_textures.end()) { if (it == custom_texture_map.end()) {
LOG_WARNING(Render, "Unable to find replacement for surface with hash {:016X}", data_hash); LOG_WARNING(Render, "Unable to find replacement for surface with hash {:016X}", data_hash);
return dummy_texture; return dummy_texture;
} }
LOG_DEBUG(Render, "Assigning {} to surface with hash {:016X}", it->second.path, data_hash); CustomTexture& texture = *it->second;
return it->second; LOG_DEBUG(Render, "Assigning {} to surface with hash {:016X}", texture.path, data_hash);
return texture;
} }
void CustomTexManager::DecodeToStaging(const Texture& texture, const StagingData& staging) { void CustomTexManager::DecodeToStaging(CustomTexture& texture, StagingData& staging) {
switch (texture.file_format) { if (texture.decoded) {
case CustomFileFormat::PNG: // Nothing to do here, just copy over the data
if (!DecodePNG(texture.data, staging.mapped)) { ASSERT_MSG(staging.size == texture.staging_size,
LOG_ERROR(Render, "Failed to decode png {}", texture.path); "Incorrect staging size for custom texture with hash {:016X}", texture.hash);
}
if (compatibility_mode) {
const u32 stride = texture.width * 4;
FlipTexture(staging.mapped, texture.width, texture.height, stride);
}
break;
case CustomFileFormat::DDS:
case CustomFileFormat::KTX:
// Compressed formats don't need CPU decoding
std::memcpy(staging.mapped.data(), texture.data.data(), texture.data.size()); std::memcpy(staging.mapped.data(), texture.data.data(), texture.data.size());
break; return;
} }
// Set an atomic flag in staging data so the backend can wait until the data is finished
staging.flag = &texture.flag;
const auto decode = [this, &texture, mapped = staging.mapped]() {
// Read the file this is potentially the most expensive step
FileUtil::IOFile file{texture.path, "rb"};
ScratchBuffer<u8> file_data{file.GetSize()};
file.ReadBytes(file_data.Data(), file.GetSize());
// Resize the decoded data buffer
std::vector<u8>& decoded_data = texture.data;
decoded_data.resize(texture.staging_size);
// Decode
switch (texture.file_format) {
case CustomFileFormat::PNG:
if (!DecodePNG(file_data.Span(), decoded_data)) {
LOG_ERROR(Render, "Failed to decode png {}", texture.path);
}
if (compatibility_mode) {
const u32 stride = texture.width * 4;
FlipTexture(decoded_data, texture.width, texture.height, stride);
}
break;
case CustomFileFormat::DDS:
case CustomFileFormat::KTX:
// Compressed formats don't need CPU decoding and must be pre-flippede
LoadDDSKTX(file_data.Span(), decoded_data);
break;
}
// Copy it over to the staging memory
texture.decoded = true;
std::memcpy(mapped.data(), decoded_data.data(), decoded_data.size());
// Notify the backend that decode is done
texture.flag.test_and_set();
texture.flag.notify_all();
};
workers->QueueWork(std::move(decode));
} }
void CustomTexManager::LoadTexture(Texture& texture) { void CustomTexManager::QueryTexture(CustomTexture& texture) {
std::vector<u8>& data = texture.data;
// Read the file // Read the file
auto file = FileUtil::IOFile(texture.path, "rb"); FileUtil::IOFile file{texture.path, "rb"};
data.resize(file.GetSize()); ScratchBuffer<u8> data{file.GetSize()};
file.ReadBytes(data.data(), file.GetSize()); file.ReadBytes(data.Data(), file.GetSize());
// Parse it based on the file extension // Parse it based on the file extension
switch (texture.file_format) { switch (texture.file_format) {
case CustomFileFormat::PNG: case CustomFileFormat::PNG:
texture.format = CustomPixelFormat::RGBA8; // Check for other formats too? if (!ParsePNG(data.Span(), texture.staging_size, texture.width, texture.height)) {
if (!ParsePNG(data, texture.staging_size, texture.width, texture.height)) {
LOG_ERROR(Render, "Failed to parse png file {}", texture.path); LOG_ERROR(Render, "Failed to parse png file {}", texture.path);
return; return;
} }
texture.format = CustomPixelFormat::RGBA8; // Check for other formats too?
break; break;
case CustomFileFormat::DDS: case CustomFileFormat::DDS:
case CustomFileFormat::KTX: case CustomFileFormat::KTX:
ddsktx_format format{}; ddsktx_format format{};
if (!ParseDDSKTX(data, texture.data, texture.width, texture.height, format)) { if (!ParseDDSKTX(data.Span(), texture.staging_size, texture.width, texture.height,
format)) {
LOG_ERROR(Render, "Failed to parse dds/ktx file {}", texture.path); LOG_ERROR(Render, "Failed to parse dds/ktx file {}", texture.path);
return; return;
} }
texture.staging_size = texture.data.size();
texture.format = ToCustomPixelFormat(format); texture.format = ToCustomPixelFormat(format);
break; break;
} }

View File

@@ -4,6 +4,7 @@
#pragma once #pragma once
#include <atomic>
#include <span> #include <span>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@@ -27,17 +28,20 @@ enum class CustomFileFormat : u32 {
KTX = 2, KTX = 2,
}; };
struct Texture { struct CustomTexture {
u32 width; u32 width;
u32 height; u32 height;
unsigned long long hash{};
CustomPixelFormat format; CustomPixelFormat format;
CustomFileFormat file_format; CustomFileFormat file_format;
std::string path; std::string path;
std::size_t staging_size; std::size_t staging_size;
std::vector<u8> data; std::vector<u8> data;
std::atomic_flag flag;
bool decoded = false;
operator bool() const noexcept { operator bool() const noexcept {
return !data.empty(); return hash != 0;
} }
}; };
@@ -56,10 +60,10 @@ public:
void DumpTexture(const SurfaceParams& params, u32 level, std::span<u8> data); void DumpTexture(const SurfaceParams& params, u32 level, std::span<u8> data);
/// Returns the custom texture handle assigned to the provided data hash /// Returns the custom texture handle assigned to the provided data hash
const Texture& GetTexture(u64 data_hash); CustomTexture& GetTexture(u64 data_hash);
/// Decodes the data in texture to a consumable format /// Decodes the data in texture to a consumable format
void DecodeToStaging(const Texture& texture, const StagingData& staging); void DecodeToStaging(CustomTexture& texture, StagingData& staging);
bool CompatibilityMode() const noexcept { bool CompatibilityMode() const noexcept {
return compatibility_mode; return compatibility_mode;
@@ -67,15 +71,16 @@ public:
private: private:
/// Fills the texture structure with information from the file in path /// Fills the texture structure with information from the file in path
void LoadTexture(Texture& texture); void QueryTexture(CustomTexture& texture);
private: private:
Core::System& system; Core::System& system;
Common::ThreadWorker workers; std::unique_ptr<Common::ThreadWorker> workers;
std::unordered_set<u64> dumped_textures; std::unordered_set<u64> dumped_textures;
std::unordered_map<u64, Texture> custom_textures; std::unordered_map<u64, CustomTexture*> custom_texture_map;
std::vector<std::unique_ptr<CustomTexture>> custom_textures;
std::vector<u8> temp_buffer; std::vector<u8> temp_buffer;
Texture dummy_texture{}; CustomTexture dummy_texture{};
bool textures_loaded{}; bool textures_loaded{};
bool compatibility_mode{true}; bool compatibility_mode{true};
}; };

View File

@@ -966,7 +966,7 @@ bool RasterizerCache<T>::UploadCustomSurface(Surface& surface, const SurfacePara
const u32 level = surface.LevelOf(load_info.addr); const u32 level = surface.LevelOf(load_info.addr);
const bool is_base_level = level == 0; const bool is_base_level = level == 0;
const u64 hash = custom_tex_manager.ComputeHash(load_info, upload_data); const u64 hash = custom_tex_manager.ComputeHash(load_info, upload_data);
const Texture& texture = custom_tex_manager.GetTexture(hash); CustomTexture& texture = custom_tex_manager.GetTexture(hash);
// The old texture pack system did not support mipmaps so older packs might do // The old texture pack system did not support mipmaps so older packs might do
// wonky things. For example many packs have mipmaps larger than the base // wonky things. For example many packs have mipmaps larger than the base
@@ -995,7 +995,7 @@ bool RasterizerCache<T>::UploadCustomSurface(Surface& surface, const SurfacePara
// Copy and decode the custom texture to the staging buffer // Copy and decode the custom texture to the staging buffer
const u32 custom_size = static_cast<u32>(texture.staging_size); const u32 custom_size = static_cast<u32>(texture.staging_size);
const StagingData staging = runtime.FindStaging(custom_size, true); StagingData staging = runtime.FindStaging(custom_size, true);
custom_tex_manager.DecodeToStaging(texture, staging); custom_tex_manager.DecodeToStaging(texture, staging);
// Upload surface // Upload surface

View File

@@ -4,6 +4,7 @@
#pragma once #pragma once
#include <atomic>
#include <span> #include <span>
#include <boost/icl/right_open_interval.hpp> #include <boost/icl/right_open_interval.hpp>
#include "common/hash.h" #include "common/hash.h"
@@ -84,6 +85,14 @@ struct StagingData {
u32 size = 0; u32 size = 0;
std::span<u8> mapped{}; std::span<u8> mapped{};
u64 buffer_offset = 0; u64 buffer_offset = 0;
const std::atomic_flag* flag{};
void Wait() const noexcept {
if (!flag) {
return;
}
flag->wait(false);
}
}; };
struct TextureCubeConfig { struct TextureCubeConfig {

View File

@@ -378,6 +378,10 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Handle()); glBindTexture(GL_TEXTURE_2D, Handle());
// Wait for the buffer if a decode is pending, this isn't very optimal
// but this kind of threading is very hard in gl
staging.Wait();
const auto& tuple = alloc.tuple; const auto& tuple = alloc.tuple;
if (is_custom && custom_format != VideoCore::CustomPixelFormat::RGBA8) { if (is_custom && custom_format != VideoCore::CustomPixelFormat::RGBA8) {
glCompressedTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom, glCompressedTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom,

View File

@@ -9,7 +9,7 @@
// Include vulkan-hpp header // Include vulkan-hpp header
#define VK_ENABLE_BETA_EXTENSIONS #define VK_ENABLE_BETA_EXTENSIONS
#define VK_NO_PROTOTYPES 1 #define VK_NO_PROTOTYPES
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_NO_CONSTRUCTORS #define VULKAN_HPP_NO_CONSTRUCTORS
#define VULKAN_HPP_NO_STRUCT_SETTERS #define VULKAN_HPP_NO_STRUCT_SETTERS

View File

@@ -907,6 +907,11 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier);
// Wait for a decode to finish if one is pending. Normally this isn't
// needed until we actually submit the command buffer but it's safer to do it now
// to prevent the stream buffer from reclaiming our space before we are done with it.
staging.Wait();
}); });
runtime->upload_buffer.Commit(staging.size); runtime->upload_buffer.Commit(staging.size);