custom_tex_manager: Multithread custom texture loading and decode

* Each texture has an atomic flag to signal to the backend when decoding is finished

* Don't store the file data as well to conserve RAM.
This commit is contained in:
GPUCode
2023-02-28 23:35:10 +02:00
parent 8396ce0b47
commit 19617f32c8
10 changed files with 194 additions and 88 deletions

View File

@@ -72,11 +72,11 @@ bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data) {
return true;
}
bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& width, u32& height,
bool ParseDDSKTX(std::span<const u8> dds_data, size_t& decoded_size, u32& width, u32& height,
ddsktx_format& format) {
ddsktx_texture_info tc{};
const int size = static_cast<int>(in_data.size());
if (!ddsktx_parse(&tc, in_data.data(), size, nullptr)) {
const int size = static_cast<int>(dds_data.size());
if (!ddsktx_parse(&tc, dds_data.data(), size, nullptr)) {
return false;
}
@@ -85,9 +85,23 @@ bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& wi
format = tc.format;
ddsktx_sub_data sub_data{};
ddsktx_get_sub(&tc, &sub_data, in_data.data(), size, 0, 0, 0);
ddsktx_get_sub(&tc, &sub_data, dds_data.data(), size, 0, 0, 0);
decoded_size = sub_data.size_bytes;
out_data.resize(sub_data.size_bytes);
return true;
}
bool LoadDDSKTX(std::span<const u8> dds_data, std::span<u8> out_data) {
ddsktx_texture_info tc{};
const int size = static_cast<int>(dds_data.size());
if (!ddsktx_parse(&tc, dds_data.data(), size, nullptr)) {
return false;
}
ddsktx_sub_data sub_data{};
ddsktx_get_sub(&tc, &sub_data, dds_data.data(), size, 0, 0, 0);
ASSERT(out_data.size() == sub_data.size_bytes);
std::memcpy(out_data.data(), sub_data.buff, sub_data.size_bytes);
return true;

View File

@@ -13,9 +13,11 @@ bool ParsePNG(std::span<const u8> png_data, size_t& decoded_size, u32& width, u3
bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data);
bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& width, u32& height,
bool ParseDDSKTX(std::span<const u8> dds_data, size_t& decoded_size, u32& width, u32& height,
ddsktx_format& format);
bool LoadDDSKTX(std::span<const u8> dds_data, std::span<u8> out_data);
bool EncodePNG(const std::string& out_path, std::span<u8> in_data, u32 width, u32 height,
s32 level = 6);

View File

@@ -99,6 +99,10 @@ public:
});
}
const std::size_t NumWorkers() const noexcept {
return threads.size();
}
private:
std::queue<Task> requests;
std::mutex queue_mutex;

View File

@@ -55,9 +55,7 @@ CustomPixelFormat ToCustomPixelFormat(ddsktx_format format) {
} // Anonymous namespace
CustomTexManager::CustomTexManager(Core::System& system_)
: system{system_}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1,
"Hires processing"} {}
CustomTexManager::CustomTexManager(Core::System& system_) : system{system_} {}
CustomTexManager::~CustomTexManager() = default;
@@ -66,58 +64,91 @@ void CustomTexManager::FindCustomTextures() {
return;
}
// If custom textures isn't enabled we don't want to create the thread pool
// so don't do it in the constructor, do it here instead.
workers = std::make_unique<Common::ThreadWorker>(
std::max(std::thread::hardware_concurrency(), 2U) - 1, "Custom textures");
// Custom textures are currently stored as
// [TitleID]/tex1_[width]x[height]_[64-bit hash]_[format].png
using namespace FileUtil;
const u64 program_id = system.Kernel().GetCurrentProcess()->codeset->program_id;
const std::string load_path =
fmt::format("{}textures/{:016X}/", GetUserPath(UserPath::LoadDir), program_id);
fmt::format("{}textures/{:016X}/", GetUserPath(FileUtil::UserPath::LoadDir), program_id);
// Create the directory if it did not exist
if (!Exists(load_path)) {
CreateFullPath(load_path);
if (!FileUtil::Exists(load_path)) {
FileUtil::CreateFullPath(load_path);
}
FSTEntry texture_dir;
std::vector<FSTEntry> textures;
FileUtil::FSTEntry texture_dir;
std::vector<FileUtil::FSTEntry> textures;
// 64 nested folders should be plenty for most cases
ScanDirectoryTree(load_path, texture_dir, 64);
GetAllFilesFromNestedEntries(texture_dir, textures);
FileUtil::ScanDirectoryTree(load_path, texture_dir, 64);
FileUtil::GetAllFilesFromNestedEntries(texture_dir, textures);
u32 width{};
u32 height{};
u32 format{};
unsigned long long hash{};
std::string ext(3, ' ');
// Reserve space for all the textures in the folder
const size_t num_textures = textures.size();
custom_textures.resize(num_textures);
for (const FSTEntry& file : textures) {
const std::string& path = file.physicalName;
if (file.isDirectory || !file.virtualName.starts_with("tex1_")) {
const auto load = [&](u32 begin, u32 end) {
u32 width{};
u32 height{};
u32 format{};
unsigned long long hash{};
std::string ext(3, ' ');
for (u32 i = begin; i < end; i++) {
const auto& file = textures[i];
const std::string& path = file.physicalName;
if (file.isDirectory || !file.virtualName.starts_with("tex1_")) {
continue;
}
// Parse the texture filename. We only really care about the hash,
// the rest should be queried from the file itself.
if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.%s", &width, &height,
&hash, &format, ext.data()) != 5) {
continue;
}
custom_textures[i] = std::make_unique<CustomTexture>();
CustomTexture& texture = *custom_textures[i];
// Fill in relevant information
texture.file_format = MakeFileFormat(ext);
texture.hash = hash;
texture.path = path;
// Query the file for the rest
QueryTexture(texture);
}
};
const std::size_t num_workers{workers->NumWorkers()};
const std::size_t bucket_size{num_textures / num_workers};
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? num_textures : start + bucket_size};
workers->QueueWork([start, end, &load]() { load(start, end); });
}
workers->WaitForRequests();
// Assign each texture to the hash map
for (const auto& texture : custom_textures) {
if (!texture) {
continue;
}
// Parse the texture filename. We only really care about the hash,
// the rest should be queried from the file itself.
if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.%s", &width, &height, &hash,
&format, ext.data()) != 5) {
continue;
}
auto [it, new_texture] = custom_textures.try_emplace(hash);
const unsigned long long hash = texture->hash;
auto [it, new_texture] = custom_texture_map.try_emplace(hash);
if (!new_texture) {
LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!", custom_textures[hash].path,
path);
LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!",
custom_texture_map[hash]->path, texture->path);
continue;
}
auto& texture = it->second;
texture.file_format = MakeFileFormat(ext);
texture.path = path;
// Query the required information from the file and load it.
// Since this doesn't involve any decoding it shouldn't consume too much RAM.
LoadTexture(texture);
it->second = texture.get();
}
textures_loaded = true;
@@ -134,7 +165,6 @@ u64 CustomTexManager::ComputeHash(const SurfaceParams& params, std::span<u8> dat
// this must be done...
const auto decoded = std::span{temp_buffer.data(), decoded_size};
DecodeTexture(params, params.addr, params.end, data, decoded);
return ComputeHash64(decoded.data(), decoded_size);
}
@@ -185,65 +215,98 @@ void CustomTexManager::DumpTexture(const SurfaceParams& params, u32 level, std::
EncodePNG(dump_path, decoded, width, height);
};
workers.QueueWork(std::move(dump));
workers->QueueWork(std::move(dump));
dumped_textures.insert(data_hash);
}
const Texture& CustomTexManager::GetTexture(u64 data_hash) {
auto it = custom_textures.find(data_hash);
if (it == custom_textures.end()) {
CustomTexture& CustomTexManager::GetTexture(u64 data_hash) {
auto it = custom_texture_map.find(data_hash);
if (it == custom_texture_map.end()) {
LOG_WARNING(Render, "Unable to find replacement for surface with hash {:016X}", data_hash);
return dummy_texture;
}
LOG_DEBUG(Render, "Assigning {} to surface with hash {:016X}", it->second.path, data_hash);
return it->second;
CustomTexture& texture = *it->second;
LOG_DEBUG(Render, "Assigning {} to surface with hash {:016X}", texture.path, data_hash);
return texture;
}
void CustomTexManager::DecodeToStaging(const Texture& texture, const StagingData& staging) {
switch (texture.file_format) {
case CustomFileFormat::PNG:
if (!DecodePNG(texture.data, staging.mapped)) {
LOG_ERROR(Render, "Failed to decode png {}", texture.path);
}
if (compatibility_mode) {
const u32 stride = texture.width * 4;
FlipTexture(staging.mapped, texture.width, texture.height, stride);
}
break;
case CustomFileFormat::DDS:
case CustomFileFormat::KTX:
// Compressed formats don't need CPU decoding
void CustomTexManager::DecodeToStaging(CustomTexture& texture, StagingData& staging) {
if (texture.decoded) {
// Nothing to do here, just copy over the data
ASSERT_MSG(staging.size == texture.staging_size,
"Incorrect staging size for custom texture with hash {:016X}", texture.hash);
std::memcpy(staging.mapped.data(), texture.data.data(), texture.data.size());
break;
return;
}
// Set an atomic flag in staging data so the backend can wait until the data is finished
staging.flag = &texture.flag;
const auto decode = [this, &texture, mapped = staging.mapped]() {
// Read the file this is potentially the most expensive step
FileUtil::IOFile file{texture.path, "rb"};
ScratchBuffer<u8> file_data{file.GetSize()};
file.ReadBytes(file_data.Data(), file.GetSize());
// Resize the decoded data buffer
std::vector<u8>& decoded_data = texture.data;
decoded_data.resize(texture.staging_size);
// Decode
switch (texture.file_format) {
case CustomFileFormat::PNG:
if (!DecodePNG(file_data.Span(), decoded_data)) {
LOG_ERROR(Render, "Failed to decode png {}", texture.path);
}
if (compatibility_mode) {
const u32 stride = texture.width * 4;
FlipTexture(decoded_data, texture.width, texture.height, stride);
}
break;
case CustomFileFormat::DDS:
case CustomFileFormat::KTX:
// Compressed formats don't need CPU decoding and must be pre-flippede
LoadDDSKTX(file_data.Span(), decoded_data);
break;
}
// Copy it over to the staging memory
texture.decoded = true;
std::memcpy(mapped.data(), decoded_data.data(), decoded_data.size());
// Notify the backend that decode is done
texture.flag.test_and_set();
texture.flag.notify_all();
};
workers->QueueWork(std::move(decode));
}
void CustomTexManager::LoadTexture(Texture& texture) {
std::vector<u8>& data = texture.data;
void CustomTexManager::QueryTexture(CustomTexture& texture) {
// Read the file
auto file = FileUtil::IOFile(texture.path, "rb");
data.resize(file.GetSize());
file.ReadBytes(data.data(), file.GetSize());
FileUtil::IOFile file{texture.path, "rb"};
ScratchBuffer<u8> data{file.GetSize()};
file.ReadBytes(data.Data(), file.GetSize());
// Parse it based on the file extension
switch (texture.file_format) {
case CustomFileFormat::PNG:
texture.format = CustomPixelFormat::RGBA8; // Check for other formats too?
if (!ParsePNG(data, texture.staging_size, texture.width, texture.height)) {
if (!ParsePNG(data.Span(), texture.staging_size, texture.width, texture.height)) {
LOG_ERROR(Render, "Failed to parse png file {}", texture.path);
return;
}
texture.format = CustomPixelFormat::RGBA8; // Check for other formats too?
break;
case CustomFileFormat::DDS:
case CustomFileFormat::KTX:
ddsktx_format format{};
if (!ParseDDSKTX(data, texture.data, texture.width, texture.height, format)) {
if (!ParseDDSKTX(data.Span(), texture.staging_size, texture.width, texture.height,
format)) {
LOG_ERROR(Render, "Failed to parse dds/ktx file {}", texture.path);
return;
}
texture.staging_size = texture.data.size();
texture.format = ToCustomPixelFormat(format);
break;
}

View File

@@ -4,6 +4,7 @@
#pragma once
#include <atomic>
#include <span>
#include <string>
#include <unordered_map>
@@ -27,17 +28,20 @@ enum class CustomFileFormat : u32 {
KTX = 2,
};
struct Texture {
struct CustomTexture {
u32 width;
u32 height;
unsigned long long hash{};
CustomPixelFormat format;
CustomFileFormat file_format;
std::string path;
std::size_t staging_size;
std::vector<u8> data;
std::atomic_flag flag;
bool decoded = false;
operator bool() const noexcept {
return !data.empty();
return hash != 0;
}
};
@@ -56,10 +60,10 @@ public:
void DumpTexture(const SurfaceParams& params, u32 level, std::span<u8> data);
/// Returns the custom texture handle assigned to the provided data hash
const Texture& GetTexture(u64 data_hash);
CustomTexture& GetTexture(u64 data_hash);
/// Decodes the data in texture to a consumable format
void DecodeToStaging(const Texture& texture, const StagingData& staging);
void DecodeToStaging(CustomTexture& texture, StagingData& staging);
bool CompatibilityMode() const noexcept {
return compatibility_mode;
@@ -67,15 +71,16 @@ public:
private:
/// Fills the texture structure with information from the file in path
void LoadTexture(Texture& texture);
void QueryTexture(CustomTexture& texture);
private:
Core::System& system;
Common::ThreadWorker workers;
std::unique_ptr<Common::ThreadWorker> workers;
std::unordered_set<u64> dumped_textures;
std::unordered_map<u64, Texture> custom_textures;
std::unordered_map<u64, CustomTexture*> custom_texture_map;
std::vector<std::unique_ptr<CustomTexture>> custom_textures;
std::vector<u8> temp_buffer;
Texture dummy_texture{};
CustomTexture dummy_texture{};
bool textures_loaded{};
bool compatibility_mode{true};
};

View File

@@ -966,7 +966,7 @@ bool RasterizerCache<T>::UploadCustomSurface(Surface& surface, const SurfacePara
const u32 level = surface.LevelOf(load_info.addr);
const bool is_base_level = level == 0;
const u64 hash = custom_tex_manager.ComputeHash(load_info, upload_data);
const Texture& texture = custom_tex_manager.GetTexture(hash);
CustomTexture& texture = custom_tex_manager.GetTexture(hash);
// The old texture pack system did not support mipmaps so older packs might do
// wonky things. For example many packs have mipmaps larger than the base
@@ -995,7 +995,7 @@ bool RasterizerCache<T>::UploadCustomSurface(Surface& surface, const SurfacePara
// Copy and decode the custom texture to the staging buffer
const u32 custom_size = static_cast<u32>(texture.staging_size);
const StagingData staging = runtime.FindStaging(custom_size, true);
StagingData staging = runtime.FindStaging(custom_size, true);
custom_tex_manager.DecodeToStaging(texture, staging);
// Upload surface

View File

@@ -4,6 +4,7 @@
#pragma once
#include <atomic>
#include <span>
#include <boost/icl/right_open_interval.hpp>
#include "common/hash.h"
@@ -84,6 +85,14 @@ struct StagingData {
u32 size = 0;
std::span<u8> mapped{};
u64 buffer_offset = 0;
const std::atomic_flag* flag{};
void Wait() const noexcept {
if (!flag) {
return;
}
flag->wait(false);
}
};
struct TextureCubeConfig {

View File

@@ -378,6 +378,10 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, Handle());
// Wait for the buffer if a decode is pending, this isn't very optimal
// but this kind of threading is very hard in gl
staging.Wait();
const auto& tuple = alloc.tuple;
if (is_custom && custom_format != VideoCore::CustomPixelFormat::RGBA8) {
glCompressedTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom,

View File

@@ -9,7 +9,7 @@
// Include vulkan-hpp header
#define VK_ENABLE_BETA_EXTENSIONS
#define VK_NO_PROTOTYPES 1
#define VK_NO_PROTOTYPES
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_NO_CONSTRUCTORS
#define VULKAN_HPP_NO_STRUCT_SETTERS

View File

@@ -907,6 +907,11 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier);
// Wait for a decode to finish if one is pending. Normally this isn't
// needed until we actually submit the command buffer but it's safer to do it now
// to prevent the stream buffer from reclaiming our space before we are done with it.
staging.Wait();
});
runtime->upload_buffer.Commit(staging.size);