custom_tex_manager: Multithread custom texture loading and decode
* Each texture has an atomic flag to signal to the backend when decoding is finished * Don't store the file data as well to conserve RAM.
This commit is contained in:
@@ -72,11 +72,11 @@ bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& width, u32& height,
|
||||
bool ParseDDSKTX(std::span<const u8> dds_data, size_t& decoded_size, u32& width, u32& height,
|
||||
ddsktx_format& format) {
|
||||
ddsktx_texture_info tc{};
|
||||
const int size = static_cast<int>(in_data.size());
|
||||
if (!ddsktx_parse(&tc, in_data.data(), size, nullptr)) {
|
||||
const int size = static_cast<int>(dds_data.size());
|
||||
if (!ddsktx_parse(&tc, dds_data.data(), size, nullptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -85,9 +85,23 @@ bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& wi
|
||||
format = tc.format;
|
||||
|
||||
ddsktx_sub_data sub_data{};
|
||||
ddsktx_get_sub(&tc, &sub_data, in_data.data(), size, 0, 0, 0);
|
||||
ddsktx_get_sub(&tc, &sub_data, dds_data.data(), size, 0, 0, 0);
|
||||
decoded_size = sub_data.size_bytes;
|
||||
|
||||
out_data.resize(sub_data.size_bytes);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LoadDDSKTX(std::span<const u8> dds_data, std::span<u8> out_data) {
|
||||
ddsktx_texture_info tc{};
|
||||
const int size = static_cast<int>(dds_data.size());
|
||||
if (!ddsktx_parse(&tc, dds_data.data(), size, nullptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ddsktx_sub_data sub_data{};
|
||||
ddsktx_get_sub(&tc, &sub_data, dds_data.data(), size, 0, 0, 0);
|
||||
|
||||
ASSERT(out_data.size() == sub_data.size_bytes);
|
||||
std::memcpy(out_data.data(), sub_data.buff, sub_data.size_bytes);
|
||||
|
||||
return true;
|
||||
|
@@ -13,9 +13,11 @@ bool ParsePNG(std::span<const u8> png_data, size_t& decoded_size, u32& width, u3
|
||||
|
||||
bool DecodePNG(std::span<const u8> png_data, std::span<u8> out_data);
|
||||
|
||||
bool ParseDDSKTX(std::span<const u8> in_data, std::vector<u8>& out_data, u32& width, u32& height,
|
||||
bool ParseDDSKTX(std::span<const u8> dds_data, size_t& decoded_size, u32& width, u32& height,
|
||||
ddsktx_format& format);
|
||||
|
||||
bool LoadDDSKTX(std::span<const u8> dds_data, std::span<u8> out_data);
|
||||
|
||||
bool EncodePNG(const std::string& out_path, std::span<u8> in_data, u32 width, u32 height,
|
||||
s32 level = 6);
|
||||
|
||||
|
@@ -99,6 +99,10 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
const std::size_t NumWorkers() const noexcept {
|
||||
return threads.size();
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<Task> requests;
|
||||
std::mutex queue_mutex;
|
||||
|
@@ -55,9 +55,7 @@ CustomPixelFormat ToCustomPixelFormat(ddsktx_format format) {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CustomTexManager::CustomTexManager(Core::System& system_)
|
||||
: system{system_}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1,
|
||||
"Hires processing"} {}
|
||||
CustomTexManager::CustomTexManager(Core::System& system_) : system{system_} {}
|
||||
|
||||
CustomTexManager::~CustomTexManager() = default;
|
||||
|
||||
@@ -66,58 +64,91 @@ void CustomTexManager::FindCustomTextures() {
|
||||
return;
|
||||
}
|
||||
|
||||
// If custom textures isn't enabled we don't want to create the thread pool
|
||||
// so don't do it in the constructor, do it here instead.
|
||||
workers = std::make_unique<Common::ThreadWorker>(
|
||||
std::max(std::thread::hardware_concurrency(), 2U) - 1, "Custom textures");
|
||||
|
||||
// Custom textures are currently stored as
|
||||
// [TitleID]/tex1_[width]x[height]_[64-bit hash]_[format].png
|
||||
using namespace FileUtil;
|
||||
|
||||
const u64 program_id = system.Kernel().GetCurrentProcess()->codeset->program_id;
|
||||
const std::string load_path =
|
||||
fmt::format("{}textures/{:016X}/", GetUserPath(UserPath::LoadDir), program_id);
|
||||
fmt::format("{}textures/{:016X}/", GetUserPath(FileUtil::UserPath::LoadDir), program_id);
|
||||
|
||||
// Create the directory if it did not exist
|
||||
if (!Exists(load_path)) {
|
||||
CreateFullPath(load_path);
|
||||
if (!FileUtil::Exists(load_path)) {
|
||||
FileUtil::CreateFullPath(load_path);
|
||||
}
|
||||
|
||||
FSTEntry texture_dir;
|
||||
std::vector<FSTEntry> textures;
|
||||
FileUtil::FSTEntry texture_dir;
|
||||
std::vector<FileUtil::FSTEntry> textures;
|
||||
// 64 nested folders should be plenty for most cases
|
||||
ScanDirectoryTree(load_path, texture_dir, 64);
|
||||
GetAllFilesFromNestedEntries(texture_dir, textures);
|
||||
FileUtil::ScanDirectoryTree(load_path, texture_dir, 64);
|
||||
FileUtil::GetAllFilesFromNestedEntries(texture_dir, textures);
|
||||
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
u32 format{};
|
||||
unsigned long long hash{};
|
||||
std::string ext(3, ' ');
|
||||
// Reserve space for all the textures in the folder
|
||||
const size_t num_textures = textures.size();
|
||||
custom_textures.resize(num_textures);
|
||||
|
||||
for (const FSTEntry& file : textures) {
|
||||
const std::string& path = file.physicalName;
|
||||
if (file.isDirectory || !file.virtualName.starts_with("tex1_")) {
|
||||
const auto load = [&](u32 begin, u32 end) {
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
u32 format{};
|
||||
unsigned long long hash{};
|
||||
std::string ext(3, ' ');
|
||||
|
||||
for (u32 i = begin; i < end; i++) {
|
||||
const auto& file = textures[i];
|
||||
const std::string& path = file.physicalName;
|
||||
if (file.isDirectory || !file.virtualName.starts_with("tex1_")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse the texture filename. We only really care about the hash,
|
||||
// the rest should be queried from the file itself.
|
||||
if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.%s", &width, &height,
|
||||
&hash, &format, ext.data()) != 5) {
|
||||
continue;
|
||||
}
|
||||
|
||||
custom_textures[i] = std::make_unique<CustomTexture>();
|
||||
CustomTexture& texture = *custom_textures[i];
|
||||
|
||||
// Fill in relevant information
|
||||
texture.file_format = MakeFileFormat(ext);
|
||||
texture.hash = hash;
|
||||
texture.path = path;
|
||||
|
||||
// Query the file for the rest
|
||||
QueryTexture(texture);
|
||||
}
|
||||
};
|
||||
|
||||
const std::size_t num_workers{workers->NumWorkers()};
|
||||
const std::size_t bucket_size{num_textures / num_workers};
|
||||
|
||||
for (std::size_t i = 0; i < num_workers; ++i) {
|
||||
const bool is_last_worker = i + 1 == num_workers;
|
||||
const std::size_t start{bucket_size * i};
|
||||
const std::size_t end{is_last_worker ? num_textures : start + bucket_size};
|
||||
workers->QueueWork([start, end, &load]() { load(start, end); });
|
||||
}
|
||||
|
||||
workers->WaitForRequests();
|
||||
|
||||
// Assign each texture to the hash map
|
||||
for (const auto& texture : custom_textures) {
|
||||
if (!texture) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse the texture filename. We only really care about the hash,
|
||||
// the rest should be queried from the file itself.
|
||||
if (std::sscanf(file.virtualName.c_str(), "tex1_%ux%u_%llX_%u.%s", &width, &height, &hash,
|
||||
&format, ext.data()) != 5) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto [it, new_texture] = custom_textures.try_emplace(hash);
|
||||
const unsigned long long hash = texture->hash;
|
||||
auto [it, new_texture] = custom_texture_map.try_emplace(hash);
|
||||
if (!new_texture) {
|
||||
LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!", custom_textures[hash].path,
|
||||
path);
|
||||
LOG_ERROR(Render, "Textures {} and {} conflict, ignoring!",
|
||||
custom_texture_map[hash]->path, texture->path);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& texture = it->second;
|
||||
texture.file_format = MakeFileFormat(ext);
|
||||
texture.path = path;
|
||||
|
||||
// Query the required information from the file and load it.
|
||||
// Since this doesn't involve any decoding it shouldn't consume too much RAM.
|
||||
LoadTexture(texture);
|
||||
it->second = texture.get();
|
||||
}
|
||||
|
||||
textures_loaded = true;
|
||||
@@ -134,7 +165,6 @@ u64 CustomTexManager::ComputeHash(const SurfaceParams& params, std::span<u8> dat
|
||||
// this must be done...
|
||||
const auto decoded = std::span{temp_buffer.data(), decoded_size};
|
||||
DecodeTexture(params, params.addr, params.end, data, decoded);
|
||||
|
||||
return ComputeHash64(decoded.data(), decoded_size);
|
||||
}
|
||||
|
||||
@@ -185,65 +215,98 @@ void CustomTexManager::DumpTexture(const SurfaceParams& params, u32 level, std::
|
||||
EncodePNG(dump_path, decoded, width, height);
|
||||
};
|
||||
|
||||
workers.QueueWork(std::move(dump));
|
||||
workers->QueueWork(std::move(dump));
|
||||
dumped_textures.insert(data_hash);
|
||||
}
|
||||
|
||||
const Texture& CustomTexManager::GetTexture(u64 data_hash) {
|
||||
auto it = custom_textures.find(data_hash);
|
||||
if (it == custom_textures.end()) {
|
||||
CustomTexture& CustomTexManager::GetTexture(u64 data_hash) {
|
||||
auto it = custom_texture_map.find(data_hash);
|
||||
if (it == custom_texture_map.end()) {
|
||||
LOG_WARNING(Render, "Unable to find replacement for surface with hash {:016X}", data_hash);
|
||||
return dummy_texture;
|
||||
}
|
||||
|
||||
LOG_DEBUG(Render, "Assigning {} to surface with hash {:016X}", it->second.path, data_hash);
|
||||
return it->second;
|
||||
CustomTexture& texture = *it->second;
|
||||
LOG_DEBUG(Render, "Assigning {} to surface with hash {:016X}", texture.path, data_hash);
|
||||
|
||||
return texture;
|
||||
}
|
||||
|
||||
void CustomTexManager::DecodeToStaging(const Texture& texture, const StagingData& staging) {
|
||||
switch (texture.file_format) {
|
||||
case CustomFileFormat::PNG:
|
||||
if (!DecodePNG(texture.data, staging.mapped)) {
|
||||
LOG_ERROR(Render, "Failed to decode png {}", texture.path);
|
||||
}
|
||||
if (compatibility_mode) {
|
||||
const u32 stride = texture.width * 4;
|
||||
FlipTexture(staging.mapped, texture.width, texture.height, stride);
|
||||
}
|
||||
break;
|
||||
case CustomFileFormat::DDS:
|
||||
case CustomFileFormat::KTX:
|
||||
// Compressed formats don't need CPU decoding
|
||||
void CustomTexManager::DecodeToStaging(CustomTexture& texture, StagingData& staging) {
|
||||
if (texture.decoded) {
|
||||
// Nothing to do here, just copy over the data
|
||||
ASSERT_MSG(staging.size == texture.staging_size,
|
||||
"Incorrect staging size for custom texture with hash {:016X}", texture.hash);
|
||||
std::memcpy(staging.mapped.data(), texture.data.data(), texture.data.size());
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
||||
// Set an atomic flag in staging data so the backend can wait until the data is finished
|
||||
staging.flag = &texture.flag;
|
||||
|
||||
const auto decode = [this, &texture, mapped = staging.mapped]() {
|
||||
// Read the file this is potentially the most expensive step
|
||||
FileUtil::IOFile file{texture.path, "rb"};
|
||||
ScratchBuffer<u8> file_data{file.GetSize()};
|
||||
file.ReadBytes(file_data.Data(), file.GetSize());
|
||||
|
||||
// Resize the decoded data buffer
|
||||
std::vector<u8>& decoded_data = texture.data;
|
||||
decoded_data.resize(texture.staging_size);
|
||||
|
||||
// Decode
|
||||
switch (texture.file_format) {
|
||||
case CustomFileFormat::PNG:
|
||||
if (!DecodePNG(file_data.Span(), decoded_data)) {
|
||||
LOG_ERROR(Render, "Failed to decode png {}", texture.path);
|
||||
}
|
||||
if (compatibility_mode) {
|
||||
const u32 stride = texture.width * 4;
|
||||
FlipTexture(decoded_data, texture.width, texture.height, stride);
|
||||
}
|
||||
break;
|
||||
case CustomFileFormat::DDS:
|
||||
case CustomFileFormat::KTX:
|
||||
// Compressed formats don't need CPU decoding and must be pre-flippede
|
||||
LoadDDSKTX(file_data.Span(), decoded_data);
|
||||
break;
|
||||
}
|
||||
|
||||
// Copy it over to the staging memory
|
||||
texture.decoded = true;
|
||||
std::memcpy(mapped.data(), decoded_data.data(), decoded_data.size());
|
||||
|
||||
// Notify the backend that decode is done
|
||||
texture.flag.test_and_set();
|
||||
texture.flag.notify_all();
|
||||
};
|
||||
|
||||
workers->QueueWork(std::move(decode));
|
||||
}
|
||||
|
||||
void CustomTexManager::LoadTexture(Texture& texture) {
|
||||
std::vector<u8>& data = texture.data;
|
||||
|
||||
void CustomTexManager::QueryTexture(CustomTexture& texture) {
|
||||
// Read the file
|
||||
auto file = FileUtil::IOFile(texture.path, "rb");
|
||||
data.resize(file.GetSize());
|
||||
file.ReadBytes(data.data(), file.GetSize());
|
||||
FileUtil::IOFile file{texture.path, "rb"};
|
||||
ScratchBuffer<u8> data{file.GetSize()};
|
||||
file.ReadBytes(data.Data(), file.GetSize());
|
||||
|
||||
// Parse it based on the file extension
|
||||
switch (texture.file_format) {
|
||||
case CustomFileFormat::PNG:
|
||||
texture.format = CustomPixelFormat::RGBA8; // Check for other formats too?
|
||||
if (!ParsePNG(data, texture.staging_size, texture.width, texture.height)) {
|
||||
if (!ParsePNG(data.Span(), texture.staging_size, texture.width, texture.height)) {
|
||||
LOG_ERROR(Render, "Failed to parse png file {}", texture.path);
|
||||
return;
|
||||
}
|
||||
texture.format = CustomPixelFormat::RGBA8; // Check for other formats too?
|
||||
break;
|
||||
case CustomFileFormat::DDS:
|
||||
case CustomFileFormat::KTX:
|
||||
ddsktx_format format{};
|
||||
if (!ParseDDSKTX(data, texture.data, texture.width, texture.height, format)) {
|
||||
if (!ParseDDSKTX(data.Span(), texture.staging_size, texture.width, texture.height,
|
||||
format)) {
|
||||
LOG_ERROR(Render, "Failed to parse dds/ktx file {}", texture.path);
|
||||
return;
|
||||
}
|
||||
texture.staging_size = texture.data.size();
|
||||
texture.format = ToCustomPixelFormat(format);
|
||||
break;
|
||||
}
|
||||
|
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
@@ -27,17 +28,20 @@ enum class CustomFileFormat : u32 {
|
||||
KTX = 2,
|
||||
};
|
||||
|
||||
struct Texture {
|
||||
struct CustomTexture {
|
||||
u32 width;
|
||||
u32 height;
|
||||
unsigned long long hash{};
|
||||
CustomPixelFormat format;
|
||||
CustomFileFormat file_format;
|
||||
std::string path;
|
||||
std::size_t staging_size;
|
||||
std::vector<u8> data;
|
||||
std::atomic_flag flag;
|
||||
bool decoded = false;
|
||||
|
||||
operator bool() const noexcept {
|
||||
return !data.empty();
|
||||
return hash != 0;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -56,10 +60,10 @@ public:
|
||||
void DumpTexture(const SurfaceParams& params, u32 level, std::span<u8> data);
|
||||
|
||||
/// Returns the custom texture handle assigned to the provided data hash
|
||||
const Texture& GetTexture(u64 data_hash);
|
||||
CustomTexture& GetTexture(u64 data_hash);
|
||||
|
||||
/// Decodes the data in texture to a consumable format
|
||||
void DecodeToStaging(const Texture& texture, const StagingData& staging);
|
||||
void DecodeToStaging(CustomTexture& texture, StagingData& staging);
|
||||
|
||||
bool CompatibilityMode() const noexcept {
|
||||
return compatibility_mode;
|
||||
@@ -67,15 +71,16 @@ public:
|
||||
|
||||
private:
|
||||
/// Fills the texture structure with information from the file in path
|
||||
void LoadTexture(Texture& texture);
|
||||
void QueryTexture(CustomTexture& texture);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
Common::ThreadWorker workers;
|
||||
std::unique_ptr<Common::ThreadWorker> workers;
|
||||
std::unordered_set<u64> dumped_textures;
|
||||
std::unordered_map<u64, Texture> custom_textures;
|
||||
std::unordered_map<u64, CustomTexture*> custom_texture_map;
|
||||
std::vector<std::unique_ptr<CustomTexture>> custom_textures;
|
||||
std::vector<u8> temp_buffer;
|
||||
Texture dummy_texture{};
|
||||
CustomTexture dummy_texture{};
|
||||
bool textures_loaded{};
|
||||
bool compatibility_mode{true};
|
||||
};
|
||||
|
@@ -966,7 +966,7 @@ bool RasterizerCache<T>::UploadCustomSurface(Surface& surface, const SurfacePara
|
||||
const u32 level = surface.LevelOf(load_info.addr);
|
||||
const bool is_base_level = level == 0;
|
||||
const u64 hash = custom_tex_manager.ComputeHash(load_info, upload_data);
|
||||
const Texture& texture = custom_tex_manager.GetTexture(hash);
|
||||
CustomTexture& texture = custom_tex_manager.GetTexture(hash);
|
||||
|
||||
// The old texture pack system did not support mipmaps so older packs might do
|
||||
// wonky things. For example many packs have mipmaps larger than the base
|
||||
@@ -995,7 +995,7 @@ bool RasterizerCache<T>::UploadCustomSurface(Surface& surface, const SurfacePara
|
||||
|
||||
// Copy and decode the custom texture to the staging buffer
|
||||
const u32 custom_size = static_cast<u32>(texture.staging_size);
|
||||
const StagingData staging = runtime.FindStaging(custom_size, true);
|
||||
StagingData staging = runtime.FindStaging(custom_size, true);
|
||||
custom_tex_manager.DecodeToStaging(texture, staging);
|
||||
|
||||
// Upload surface
|
||||
|
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <span>
|
||||
#include <boost/icl/right_open_interval.hpp>
|
||||
#include "common/hash.h"
|
||||
@@ -84,6 +85,14 @@ struct StagingData {
|
||||
u32 size = 0;
|
||||
std::span<u8> mapped{};
|
||||
u64 buffer_offset = 0;
|
||||
const std::atomic_flag* flag{};
|
||||
|
||||
void Wait() const noexcept {
|
||||
if (!flag) {
|
||||
return;
|
||||
}
|
||||
flag->wait(false);
|
||||
}
|
||||
};
|
||||
|
||||
struct TextureCubeConfig {
|
||||
|
@@ -378,6 +378,10 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, Handle());
|
||||
|
||||
// Wait for the buffer if a decode is pending, this isn't very optimal
|
||||
// but this kind of threading is very hard in gl
|
||||
staging.Wait();
|
||||
|
||||
const auto& tuple = alloc.tuple;
|
||||
if (is_custom && custom_format != VideoCore::CustomPixelFormat::RGBA8) {
|
||||
glCompressedTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, rect.left, rect.bottom,
|
||||
|
@@ -9,7 +9,7 @@
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_ENABLE_BETA_EXTENSIONS
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
#define VK_NO_PROTOTYPES
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#define VULKAN_HPP_NO_STRUCT_SETTERS
|
||||
|
@@ -907,6 +907,11 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier);
|
||||
|
||||
// Wait for a decode to finish if one is pending. Normally this isn't
|
||||
// needed until we actually submit the command buffer but it's safer to do it now
|
||||
// to prevent the stream buffer from reclaiming our space before we are done with it.
|
||||
staging.Wait();
|
||||
});
|
||||
|
||||
runtime->upload_buffer.Commit(staging.size);
|
||||
|
Reference in New Issue
Block a user