video_core: Initial port of shader cache

* Need to add backend specific loading
This commit is contained in:
emufan4568
2022-08-09 01:31:35 +03:00
parent 39ca721cac
commit 1ded25f68b
20 changed files with 3971 additions and 65 deletions

View File

@@ -184,6 +184,8 @@ template <typename T, typename V>
}
using Vec2f = Vec2<float>;
using Vec2u = Vec2<unsigned>;
using Vec2i = Vec2<int>;
template <>
inline float Vec2<float>::Length() const {
@@ -412,6 +414,8 @@ inline float Vec3<float>::Normalize() {
}
using Vec3f = Vec3<float>;
using Vec3u = Vec3<unsigned>;
using Vec3i = Vec3<int>;
template <typename T>
class Vec4 {
@@ -623,6 +627,8 @@ template <typename T, typename V>
}
using Vec4f = Vec4<float>;
using Vec4u = Vec4<unsigned>;
using Vec4i = Vec4<int>;
template <typename T>
constexpr decltype(T{} * T{} + T{} * T{}) Dot(const Vec2<T>& a, const Vec2<T>& b) {

View File

@@ -27,8 +27,17 @@ add_library(video_core STATIC
common/buffer.h
common/framebuffer.h
common/pica_types.h
common/pica_uniforms.cpp
common/pica_uniforms.h
common/pipeline_cache.cpp
common/pipeline_cache.h
common/rasterizer.cpp
common/rasterizer.h
common/rasterizer_cache.cpp
common/rasterizer_cache.h
common/shader_runtime_cache.h
common/shader_disk_cache.cpp
common/shader_disk_cache.h
common/shader_gen.cpp
common/shader_gen.h
common/shader.h

View File

@@ -39,6 +39,9 @@ public:
// Creates a backend specific sampler object
virtual SamplerHandle CreateSampler(SamplerInfo info) = 0;
// Creates a backend specific shader object
virtual ShaderHandle CreateShader(ShaderStage stage, std::string_view name, std::string source) = 0;
// Start a draw operation
virtual void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
BufferHandle vertex_buffer,

View File

@@ -0,0 +1,25 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/common/pica_uniforms.h"
namespace VideoCore {
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup) {
std::ranges::transform(setup.uniforms.b, bools.begin(), [](bool value) {
return BoolAligned{value ? true : false};
});
std::ranges::transform(regs.int_uniforms, i.begin(), [](const auto& value) {
return Common::Vec4u{value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
});
std::ranges::transform(setup.uniforms.f, f.begin(), [](const auto& value) {
return Common::Vec4f{value.x.ToFloat32(), value.y.ToFloat32(),
value.z.ToFloat32(), value.w.ToFloat32()};
});
}
} // namespace VideoCore

View File

@@ -0,0 +1,96 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/vector_math.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_shader.h"
#include "video_core/shader/shader.h"
namespace VideoCore {
enum class UniformBindings : u32 {
Common = 0,
VertexShader = 1,
GeometryShader = 2
};
struct LightSrc {
alignas(16) Common::Vec3f specular_0;
alignas(16) Common::Vec3f specular_1;
alignas(16) Common::Vec3f diffuse;
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/**
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
* Not following that rule will cause problems on some AMD drivers.
*/
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
alignas(16) Common::Vec4i lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4];
alignas(16) Common::Vec3f fog_color;
alignas(8) Common::Vec2f proctex_noise_f;
alignas(8) Common::Vec2f proctex_noise_a;
alignas(8) Common::Vec2f proctex_noise_p;
alignas(16) Common::Vec3f lighting_global_ambient;
LightSrc light_src[8];
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec4f clip_coef;
};
static_assert(sizeof(UniformData) == 0x4F0,
"The size of the UniformData structure has changed, update the structure in the shader");
/**
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
* NOTE: the same rule from UniformData also applies here.
*/
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
struct BoolAligned {
alignas(16) int b;
};
std::array<BoolAligned, 16> bools;
alignas(16) std::array<Common::Vec4u, 4> i;
alignas(16) std::array<Common::Vec4f, 96> f;
};
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(sizeof(VSUniformData) == 1856,
"The size of the VSUniformData structure has changed, update the structure in the shader");
} // namespace VideoCore

View File

@@ -87,19 +87,19 @@ enum class AttribType : u8 {
Float = 0,
Int = 1,
Short = 2,
Byte = 3
Byte = 3,
Ubyte = 4
};
union VertexAttribute {
u8 value = 0;
BitField<0, 2, AttribType> type;
BitField<2, 3, u8> components;
BitField<0, 3, AttribType> type;
BitField<3, 3, u8> components;
};
#pragma pack(1)
struct VertexLayout {
u8 stride = 0;
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes{};
};
#pragma pack()
@@ -123,6 +123,7 @@ struct PipelineInfo {
};
#pragma pack()
// An opaque handle to a backend specific program pipeline
class PipelineBase : public IntrusivePtrEnabled<PipelineBase> {
public:
PipelineBase(PipelineType type, PipelineInfo info) :

View File

@@ -0,0 +1,317 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <thread>
#include <tuple>
#include "core/frontend/scope_acquire_context.h"
#include "video_core/common/pipeline_cache.h"
#include "video_core/common/shader.h"
#include "video_core/common/shader_gen.h"
#include "video_core/video_core.h"
namespace VideoCore {
static u64 GetUniqueIdentifier(const Pica::Regs& regs, std::span<const u32> code) {
u64 hash = Common::ComputeHash64(regs.reg_array.data(), Pica::Regs::NUM_REGS * sizeof(u32));
if (code.size() > 0) {
u64 code_uid = Common::ComputeHash64(code.data(), code.size() * sizeof(u32));
hash = Common::HashCombine(hash, code_uid);
}
return hash;
}
static auto BuildVSConfigFromRaw(const ShaderDiskCacheRaw& raw) {
Pica::Shader::ProgramCode program_code{};
Pica::Shader::SwizzleData swizzle_data{};
std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH,
program_code.begin());
std::copy_n(raw.GetProgramCode().begin() + Pica::Shader::MAX_PROGRAM_CODE_LENGTH,
Pica::Shader::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin());
Pica::Shader::ShaderSetup setup;
setup.program_code = program_code;
setup.swizzle_data = swizzle_data;
return std::make_tuple(PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup);
}
PipelineCache::PipelineCache(Frontend::EmuWindow& emu_window, std::unique_ptr<BackendBase>& backend)
: emu_window(emu_window), backend(backend), pica_vertex_shaders(backend, generator),
fixed_geometry_shaders(backend, generator), fragment_shaders(backend, generator),
disk_cache(backend) {
//generator = std::make_unique<ShaderGenerator
}
bool PipelineCache::UsePicaVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) {
PicaVSConfig config{regs.vs, setup};
auto [handle, shader_str] = pica_vertex_shaders.Get(config, setup);
if (!handle.IsValid()) {
return false;
}
current_vertex_shader = handle;
// Save VS to the disk cache if its a new shader
if (shader_str.has_value()) {
// Copy program code
std::vector<u32> program_code{setup.program_code.begin(), setup.program_code.end()};
program_code.insert(program_code.end(), setup.swizzle_data.begin(), setup.swizzle_data.end());
// Hash the bytecode and save the pica program
const u64 unique_identifier = GetUniqueIdentifier(regs, program_code);
const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VertexShader,
regs, std::move(program_code)};
disk_cache.SaveRaw(raw);
disk_cache.SaveDecompiled(unique_identifier, shader_str.value(),
VideoCore::g_hw_shader_accurate_mul);
}
return true;
}
void PipelineCache::UseTrivialVertexShader() {
current_vertex_shader = trivial_vertex_shader;
}
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
PicaFixedGSConfig gs_config{regs};
auto [handle, _] = fixed_geometry_shaders.Get(gs_config);
current_geometry_shader = handle;
}
void PipelineCache::UseTrivialGeometryShader() {
current_geometry_shader = ShaderHandle{};
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
PicaFSConfig config{regs};
auto [handle, shader_str] = fragment_shaders.Get(config);
current_fragment_shader = handle;
// Save FS to the disk cache if its a new shader
if (shader_str.has_value()) {
u64 unique_identifier = GetUniqueIdentifier(regs, {});
ShaderDiskCacheRaw raw{unique_identifier, ProgramType::FragmentShader, regs, {}};
disk_cache.SaveRaw(raw);
disk_cache.SaveDecompiled(unique_identifier, shader_str.value(), false);
}
}
void PipelineCache::LoadDiskCache(const std::atomic_bool& stop_loading, const DiskLoadCallback& callback) {
const auto transferable = disk_cache.LoadTransferable();
if (!transferable.has_value()) {
return;
}
const auto& raws = transferable.value();
// Load uncompressed precompiled file for non-separable shaders.
// Precompiled file for separable shaders is compressed.
auto [decompiled, dumps] = disk_cache.LoadPrecompiled(true);
if (stop_loading) {
return;
}
std::set<GLenum> supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
std::mutex mutex;
std::atomic_bool compilation_failed = false;
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
std::vector<std::size_t> load_raws_index;
for (u64 i = 0; i < raws.size(); i++) {
if (stop_loading || compilation_failed) {
return;
}
const ShaderDiskCacheRaw& raw = raws[i];
const u64 unique_identifier = raw.GetUniqueIdentifier();
const u64 calculated_hash = GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode());
// Check for any data corruption
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_Vulkan, "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing "
"shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateAll();
return;
}
const auto dump = dumps.find(unique_identifier);
const auto decomp = decompiled.find(unique_identifier);
ShaderHandle shader{};
if (dump != dumps.end() && decomp != decompiled.end()) {
// Only load the vertex shader if its sanitize_mul setting matches
if (raw.GetProgramType() == ProgramType::VertexShader &&
decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) {
continue;
}
// If the shader is dumped, attempt to load it
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
if (!shader.IsValid()) {
// If any shader failed, stop trying to compile, delete the cache, and start
// loading from raws
compilation_failed = true;
return;
}
// We have both the binary shader and the decompiled, so inject it into the
// cache
if (raw.GetProgramType() == ProgramType::VertexShader) {
auto [conf, setup] = BuildVSConfigFromRaw(raw);
std::scoped_lock lock(mutex);
pica_vertex_shaders.Inject(conf, decomp->second.result, std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FragmentShader) {
const PicaFSConfig conf{raw.GetRawShaderConfig()};
std::scoped_lock lock(mutex);
fragment_shaders.Inject(conf, std::move(shader));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType());
compilation_failed = true;
return;
}
} else {
// Since precompiled didn't have the dump, we'll load them in the next phase
std::scoped_lock lock(mutex);
load_raws_index.push_back(i);
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
}
// Invalidate the precompiled cache if a shader dumped shader was rejected
bool load_all_raws = false;
if (compilation_failed) {
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
load_all_raws = true;
}
const std::size_t load_raws_size = load_all_raws ? raws.size() : load_raws_index.size();
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, load_raws_size);
}
compilation_failed = false;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
Frontend::ScopeAcquireContext scope(*context);
for (u64 i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
return;
}
const u64 raws_index = load_all_raws ? i : load_raws_index[i];
const auto& raw = raws[raws_index];
const u64 unique_identifier = raw.GetUniqueIdentifier();
bool sanitize_mul = false;
ShaderHandle shader{nullptr};
std::optional<std::string> result;
// Otherwise decompile and build the shader at boot and save the result to the
// precompiled file
if (raw.GetProgramType() == ProgramType::VertexShader) {
auto [conf, setup] = BuildVSConfigFromRaw(raw);
result = generator->GenerateVertexShader(setup, conf);
// Compile shader
shader = backend->CreateShader(ShaderStage::Vertex, "Vertex shader", result.value());
shader->Compile(ShaderOptimization::Debug);
sanitize_mul = conf.sanitize_mul;
std::scoped_lock lock(mutex);
pica_vertex_shaders.Inject(conf, result.value(), std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FragmentShader) {
const PicaFSConfig conf{raw.GetRawShaderConfig()};
result = generator->GenerateFragmentShader(conf);
// Compile shader
shader = backend->CreateShader(ShaderStage::Fragment, "Fragment shader", result.value());
shader->Compile(ShaderOptimization::Debug);
std::scoped_lock lock(mutex);
fragment_shaders.Inject(conf, std::move(shader));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_ERROR(Frontend, "Failed to load raw ProgramType {}", raw.GetProgramType());
compilation_failed = true;
return;
}
if (!shader.IsValid()) {
LOG_ERROR(Frontend, "Compilation from raw failed {:x} {:x}",
raw.GetProgramCode()[0], raw.GetProgramCode()[1]);
compilation_failed = true;
return;
}
std::scoped_lock lock(mutex);
// If this is a new separable shader, add it the precompiled cache
if (result) {
disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul);
disk_cache.SaveDump(unique_identifier, shader);
precompiled_cache_altered = true;
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, load_raws_size);
}
}
};
const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
const std::size_t bucket_size{load_raws_size / num_workers};
std::vector<std::unique_ptr<Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) {
disk_cache.InvalidateAll();
}
if (precompiled_cache_altered) {
disk_cache.SaveVirtualPrecompiledFile();
}
}
} // namespace OpenGL

View File

@@ -0,0 +1,75 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include "video_core/regs.h"
#include "video_core/common/shader_runtime_cache.h"
#include "video_core/common/shader_disk_cache.h"
namespace FileUtil {
class IOFile;
}
namespace Core {
class System;
}
namespace Frontend {
class EmuWindow;
}
namespace VideoCore {
enum class LoadCallbackStage : u8 {
Prepare = 0,
Decompile = 1,
Build = 2,
Complete = 3,
};
using DiskLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;
// A class that manages and caches shaders and pipelines
class PipelineCache {
public:
PipelineCache(Frontend::EmuWindow& emu_window, std::unique_ptr<BackendBase>& backend);
~PipelineCache() = default;
// Loads backend specific shader binaries from disk
void LoadDiskCache(const std::atomic_bool& stop_loading, const DiskLoadCallback& callback);
bool UsePicaVertexShader(const Pica::Regs& config, Pica::Shader::ShaderSetup& setup);
void UseTrivialVertexShader();
void UseFixedGeometryShader(const Pica::Regs& regs);
void UseTrivialGeometryShader();
// Compiles and caches a fragment shader based on the current pica state
void UseFragmentShader(const Pica::Regs& config);
private:
Frontend::EmuWindow& emu_window;
std::unique_ptr<BackendBase>& backend;
std::unique_ptr<ShaderGeneratorBase> generator;
// Keeps all the compiled graphics pipelines
std::unordered_map<PipelineInfo, PipelineHandle> cached_pipelines;
// Current shaders
ShaderHandle current_vertex_shader;
ShaderHandle current_geometry_shader;
ShaderHandle current_fragment_shader;
// Pica runtime shader caches
PicaVertexShaders pica_vertex_shaders;
FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders;
ShaderHandle trivial_vertex_shader;
// Serializes shader binaries to disk
ShaderDiskCache disk_cache;
};
} // namespace VideoCore

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,244 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <vector>
#include <memory>
#include "common/vector_math.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_texturing.h"
#include "video_core/common/rasterizer_cache.h"
#include "video_core/common/pipeline.h"
#include "video_core/shader/shader.h"
namespace Frontend {
class EmuWindow;
}
namespace VideoCore {
class ShaderProgramManager;
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
// Returns the pipeline vertex layout of the vertex
constexpr static VertexLayout GetVertexLayout();
Common::Vec4f position;
Common::Vec4f color;
Common::Vec2f tex_coord0;
Common::Vec2f tex_coord1;
Common::Vec2f tex_coord2;
float tex_coord0_w;
Common::Vec4f normquat;
Common::Vec3f view;
};
class BackendBase;
class Rasterizer {
public:
explicit Rasterizer(Frontend::EmuWindow& emu_window, std::unique_ptr<BackendBase>& backend);
~Rasterizer();
//void LoadDiskResources(const std::atomic_bool& stop_loading,
// const VideoCore::DiskResourceLoadCallback& callback);
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2);
void DrawTriangles();
void NotifyPicaRegisterChanged(u32 id);
void FlushAll();
void FlushRegion(PAddr addr, u32 size);
void InvalidateRegion(PAddr addr, u32 size);
void FlushAndInvalidateRegion(PAddr addr, u32 size);
void ClearAll(bool flush);
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config);
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config);
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config);
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info);
bool AccelerateDrawBatch(bool is_indexed);
/// Syncs entire status to match PICA registers
void SyncEntireState();
private:
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
/// Syncs the blend functions to match the PICA register
void SyncBlendFuncs();
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture bias configuration to match the PICA register
void SyncProcTexBias();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
/// Syncs the color write mask to match the PICA register state
void SyncColorWriteMask();
/// Syncs the stencil write mask to match the PICA register state
void SyncStencilWriteMask();
/// Syncs the depth write mask to match the PICA register state
void SyncDepthWriteMask();
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(std::size_t tev_index, const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs the shadow texture bias to match the PICA register
void SyncShadowTextureBias();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
bool Draw(bool accelerate, bool is_indexed);
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
struct VertexArrayInfo {
u32 vs_input_index_min;
u32 vs_input_index_max;
u32 vs_input_size;
};
/// Retrieve the range and the size of the input vertex
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray(u8* array_ptr, u32 buffer_offset, u32 vs_input_index_min, u32 vs_input_index_max);
private:
std::unique_ptr<BackendBase>& backend;
RasterizerCache res_cache;
std::vector<HardwareVertex> vertex_batch;
bool shader_dirty = true;
struct {
UniformData data;
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{true};
bool lighting_lut_dirty_any = true;
bool fog_lut_dirty = true;
bool proctex_noise_lut_dirty = true;
bool proctex_color_map_dirty = true;
bool proctex_alpha_map_dirty = true;
bool proctex_lut_dirty = true;
bool proctex_diff_lut_dirty = true;
bool dirty = true;
} uniform_block_data{};
std::unique_ptr<ShaderProgramManager> shader_program_manager;
// Clear texture for placeholder purposes
TextureHandle clear_texture;
// Uniform alignment
std::array<bool, 16> hw_vao_enabled_attributes{};
std::size_t uniform_size_aligned_vs = 0;
std::size_t uniform_size_aligned_fs = 0;
// Rasterizer used buffers (vertex, index, uniform, lut)
BufferHandle vertex_buffer, index_buffer, uniform_buffer;
BufferHandle texel_buffer_lut_lf, texel_buffer_lut;
// Pica lighting data
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{};
std::array<Common::Vec2f, 128> fog_lut_data{};
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
std::array<Common::Vec2f, 128> proctex_color_map_data{};
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
std::array<Common::Vec2f, 256> proctex_lut_data{};
std::array<Common::Vec2f, 256> proctex_diff_lut_data{};
// Texture unit sampler cache
SamplerInfo texture_cube_sampler;
std::array<SamplerInfo, 3> texture_samplers;
std::unordered_map<SamplerInfo, SamplerHandle> sampler_cache;
// TODO: Remove this
bool allow_shadow = false;
};
} // namespace VideoCore

View File

@@ -1671,8 +1671,9 @@ void RasterizerCache::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
// Interval maps will erase segments if count reaches 0, so if delta is negative we have to
// subtract after iterating
const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
if (delta > 0)
if (delta > 0) {
cached_pages.add({pages_interval, delta});
}
for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
const auto interval = pair.first & pages_interval;

View File

@@ -36,11 +36,6 @@ public:
/// Compiles the shader source code
virtual bool Compile(ShaderOptimization level) = 0;
/// Returns the API specific shader bytecode
std::string_view GetSource() const {
return source;
}
/// Returns the name given the shader module
std::string_view GetName() const {
return name;
@@ -54,7 +49,7 @@ public:
protected:
std::string_view name = "None";
ShaderStage stage = ShaderStage::Undefined;
std::string source;
std::string source = "";
};
using ShaderHandle = IntrusivePtr<ShaderBase>;

View File

@@ -0,0 +1,574 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/common/shader_disk_cache.h"
namespace VideoCore {
using ShaderCacheVersionHash = std::array<u8, 64>;
enum class TransferableEntryKind : u32 {
Raw = 0,
};
enum class PrecompiledEntryKind : u32 {
Decompiled = 0,
Dump = 1,
};
constexpr u32 NativeVersion = 1;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
return hash;
}
bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
return false;
}
u64 reg_array_len{};
if (file.ReadBytes(&reg_array_len, sizeof(u64)) != sizeof(u64)) {
return false;
}
if (file.ReadArray(config.reg_array.data(), reg_array_len) != reg_array_len) {
return false;
}
// Read in type specific configuration
if (program_type == ProgramType::VertexShader) {
u64 code_len{};
if (file.ReadBytes(&code_len, sizeof(u64)) != sizeof(u64)) {
return false;
}
program_code.resize(code_len);
if (file.ReadArray(program_code.data(), code_len) != code_len) {
return false;
}
}
return true;
}
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(program_type)) != 1) {
return false;
}
// Just for future proofing, save the sizes of the array to the file
const std::size_t reg_array_len = Pica::Regs::NUM_REGS;
if (file.WriteObject(static_cast<u64>(reg_array_len)) != 1) {
return false;
}
if (file.WriteArray(config.reg_array.data(), reg_array_len) != reg_array_len) {
return false;
}
if (program_type == ProgramType::VertexShader) {
const std::size_t code_len = program_code.size();
if (file.WriteObject(static_cast<u64>(code_len)) != 1) {
return false;
}
if (file.WriteArray(program_code.data(), code_len) != code_len) {
return false;
}
}
return true;
}
ShaderDiskCache::ShaderDiskCache(std::unique_ptr<BackendBase>& backend) : backend(backend) {
}
std::optional<std::vector<ShaderDiskCacheRaw>> ShaderDiskCache::LoadTransferable() {
const bool has_title_id = GetProgramID() != 0;
if (!Settings::values.use_hw_shader || !Settings::values.use_disk_shader_cache ||
!has_title_id) {
return std::nullopt;
}
tried_to_load = true;
FileUtil::IOFile file{GetTransferablePath(), "rb"};
if (!file.IsOpen()) {
LOG_INFO(Render_Vulkan, "No transferable shader cache found for game with title id={}",
GetTitleID());
return std::nullopt;
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_Vulkan, "Failed to get transferable cache version for title id={} - skipping",
GetTitleID());
return std::nullopt;
}
if (version < NativeVersion) {
LOG_INFO(Render_Vulkan, "Transferable shader cache is old - removing");
file.Close();
InvalidateAll();
return std::nullopt;
}
if (version > NativeVersion) {
LOG_WARNING(Render_Vulkan, "Transferable shader cache was generated with a newer version "
"of the emulator - skipping");
return std::nullopt;
}
// Version is valid, load the shaders
std::vector<ShaderDiskCacheRaw> raws;
while (file.Tell() < file.GetSize()) {
TransferableEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
LOG_ERROR(Render_Vulkan, "Failed to read transferable file - skipping");
return std::nullopt;
}
switch (kind) {
case TransferableEntryKind::Raw: {
ShaderDiskCacheRaw entry;
if (!entry.Load(file)) {
LOG_ERROR(Render_Vulkan, "Failed to load transferable raw entry - skipping");
return std::nullopt;
}
transferable.emplace(entry.GetUniqueIdentifier(), ShaderDiskCacheRaw{});
raws.push_back(std::move(entry));
break;
}
default:
LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
kind);
return std::nullopt;
}
}
LOG_INFO(Render_OpenGL, "Found a transferable disk cache with {} entries", raws.size());
return {std::move(raws)};
}
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
ShaderDiskCache::LoadPrecompiled(bool compressed) {
if (!IsUsable())
return {};
FileUtil::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
GetTitleID());
return {};
}
const std::optional result = LoadPrecompiledFile(file, compressed);
if (!result.has_value()) {
LOG_INFO(Render_OpenGL,
"Failed to load precompiled cache for game with title id={} - removing",
GetTitleID());
file.Close();
InvalidatePrecompiled();
return {};
}
return result.value();
}
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
ShaderDiskCache::LoadPrecompiledFile(FileUtil::IOFile& file, bool compressed) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> precompiled_file(file.GetSize());
file.ReadBytes(precompiled_file.data(), precompiled_file.size());
if (compressed) {
const auto decompressed = Common::Compression::DecompressDataZSTD(precompiled_file);
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
} else {
SaveArrayToPrecompiled(precompiled_file.data(), precompiled_file.size());
}
decompressed_precompiled_cache_offset = 0;
ShaderCacheVersionHash file_hash{};
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
return std::nullopt;
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
return std::nullopt;
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
ShaderDumpsMap dumps;
while (decompressed_precompiled_cache_offset < decompressed_precompiled_cache.size()) {
PrecompiledEntryKind kind{};
if (!LoadObjectFromPrecompiled(kind)) {
return std::nullopt;
}
switch (kind) {
case PrecompiledEntryKind::Decompiled: {
u64 unique_identifier{};
if (!LoadObjectFromPrecompiled(unique_identifier)) {
return std::nullopt;
}
std::optional entry = LoadDecompiledEntry();
if (!entry) {
return std::nullopt;
}
decompiled.insert({unique_identifier, std::move(*entry)});
break;
}
case PrecompiledEntryKind::Dump: {
u64 unique_identifier;
if (!LoadObjectFromPrecompiled(unique_identifier)) {
return std::nullopt;
}
ShaderDiskCacheDump dump;
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
return std::nullopt;
}
u32 binary_length{};
if (!LoadObjectFromPrecompiled(binary_length)) {
return std::nullopt;
}
dump.binary.resize(binary_length);
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return std::nullopt;
}
dumps.insert({unique_identifier, dump});
break;
}
default:
return std::nullopt;
}
}
LOG_INFO(Render_OpenGL,
"Found a precompiled disk cache with {} decompiled entries and {} binary entries",
decompiled.size(), dumps.size());
return {{decompiled, dumps}};
}
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCache::LoadDecompiledEntry() {
bool sanitize_mul;
if (!LoadObjectFromPrecompiled(sanitize_mul)) {
return std::nullopt;
}
u32 code_size{};
if (!LoadObjectFromPrecompiled(code_size)) {
return std::nullopt;
}
std::string code(code_size, '\0');
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return std::nullopt;
}
const ShaderDiskCacheDecompiled entry = {
.result = std::move(code),
.sanitize_mul = sanitize_mul
};
return entry;
}
void ShaderDiskCache::SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier,
const std::string& result,
bool sanitize_mul) {
if (!IsUsable())
return;
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
file.WriteObject(unique_identifier) != 1 || file.WriteObject(sanitize_mul) != 1 ||
file.WriteObject(static_cast<u32>(result.size())) != 1 ||
file.WriteArray(result.data(), result.size()) != result.size()) {
LOG_ERROR(Render_OpenGL, "Failed to save decompiled cache entry - removing");
file.Close();
InvalidatePrecompiled();
}
}
bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier, const std::string& result,
bool sanitize_mul) {
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) ||
!SaveObjectToPrecompiled(static_cast<u32>(result.size())) ||
!SaveArrayToPrecompiled(result.data(), result.size())) {
return false;
}
return true;
}
void ShaderDiskCache::InvalidateAll() {
if (!FileUtil::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
}
InvalidatePrecompiled();
}
void ShaderDiskCache::InvalidatePrecompiled() {
// Clear virtual precompiled cache file
decompressed_precompiled_cache.resize(0);
if (!FileUtil::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
}
void ShaderDiskCache::SaveRaw(const ShaderDiskCacheRaw& entry) {
if (!IsUsable())
return;
const u64 id = entry.GetUniqueIdentifier();
if (transferable.find(id) != transferable.end()) {
// The shader already exists
return;
}
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
file.Close();
InvalidateAll();
return;
}
transferable.insert({id, entry});
}
void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, const std::string& code, bool sanitize_mul) {
if (!IsUsable())
return;
if (decompressed_precompiled_cache.empty()) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
if (!SaveDecompiledToCache(unique_identifier, code, sanitize_mul)) {
LOG_ERROR(Render_OpenGL,
"Failed to save decompiled entry to the precompiled file - removing");
InvalidatePrecompiled();
}
}
void ShaderDiskCache::SaveDump(u64 unique_identifier, ShaderHandle shader) {
if (!IsUsable())
return;
GLint binary_length{};
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format{};
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
!SaveObjectToPrecompiled(unique_identifier) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
unique_identifier);
InvalidatePrecompiled();
return;
}
}
void ShaderDiskCache::SaveDumpToFile(u64 unique_identifier, ShaderHandle shader, bool sanitize_mul) {
if (!IsUsable())
return;
FileUtil::IOFile file = AppendPrecompiledFile();
if (!file.IsOpen())
return;
GLint binary_length{};
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format{};
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
file.WriteArray(binary.data(), binary.size()) != binary.size()) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
unique_identifier);
InvalidatePrecompiled();
return;
}
// SaveDecompiled is used only to store the accurate multiplication setting, a better way is to
// probably change the header in SaveDump
SaveDecompiledToFile(file, unique_identifier, {}, sanitize_mul);
}
bool ShaderDiskCache::IsUsable() const {
return tried_to_load && Settings::values.use_disk_shader_cache;
}
FileUtil::IOFile ShaderDiskCache::AppendTransferableFile() {
if (!EnsureDirectories())
return {};
const auto transferable_path{GetTransferablePath()};
const bool existed = FileUtil::Exists(transferable_path);
FileUtil::IOFile file(transferable_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
return {};
}
if (!existed || file.GetSize() == 0) {
// If the file didn't exist, write its version
if (file.WriteObject(NativeVersion) != 1) {
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
transferable_path);
return {};
}
}
return file;
}
FileUtil::IOFile ShaderDiskCache::AppendPrecompiledFile() {
if (!EnsureDirectories())
return {};
const auto precompiled_path{GetPrecompiledPath()};
const bool existed = FileUtil::Exists(precompiled_path);
FileUtil::IOFile file(precompiled_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
return {};
}
if (!existed || file.GetSize() == 0) {
// If the file didn't exist, write its version
const auto hash{GetShaderCacheVersionHash()};
if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
return {};
}
}
return file;
}
void ShaderDiskCache::SavePrecompiledHeaderToVirtualPrecompiledCache() {
const ShaderCacheVersionHash hash = GetShaderCacheVersionHash();
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
LOG_ERROR(Render_OpenGL,
"Failed to write precompiled cache version hash to virtual precompiled cache file");
}
}
void ShaderDiskCache::SaveVirtualPrecompiledFile() {
decompressed_precompiled_cache_offset = 0;
const std::vector<u8>& compressed = Common::Compression::CompressDataZSTDDefault(
decompressed_precompiled_cache.data(), decompressed_precompiled_cache.size());
const auto precompiled_path{GetPrecompiledPath()};
FileUtil::IOFile file(precompiled_path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
return;
}
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
return;
}
}
bool ShaderDiskCache::EnsureDirectories() const {
const auto CreateDir = [](const std::string& dir) {
if (!FileUtil::CreateDir(dir)) {
LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
return false;
}
return true;
};
return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir()) && CreateDir(GetPrecompiledShaderDir());
}
std::string ShaderDiskCache::GetTransferablePath() {
return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCache::GetPrecompiledPath() {
return FileUtil::SanitizePath(GetPrecompiledShaderDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCache::GetTransferableDir() const {
return GetBaseDir() + DIR_SEP "transferable";
}
std::string ShaderDiskCache::GetPrecompiledDir() const {
return GetBaseDir() + DIR_SEP "precompiled";
}
std::string ShaderDiskCache::GetPrecompiledShaderDir() const {
return GetPrecompiledDir() + DIR_SEP "separable";
}
std::string ShaderDiskCache::GetBaseDir() const {
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
}
u64 ShaderDiskCache::GetProgramID() {
// Skip games without title id
if (program_id != 0) {
return program_id;
}
if (Core::System::GetInstance().GetAppLoader().ReadProgramId(program_id) !=
Loader::ResultStatus::Success) {
return 0;
}
return program_id;
}
std::string ShaderDiskCache::GetTitleID() {
if (!title_id.empty()) {
return title_id;
}
title_id = fmt::format("{:016X}", GetProgramID());
return title_id;
}
} // namespace VideoCore

View File

@@ -0,0 +1,225 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <span>
#include <memory>
#include <string_view>
#include <unordered_map>
#include <vector>
#include "video_core/regs.h"
#include "video_core/common/shader.h"
namespace Core {
class System;
}
namespace FileUtil {
class IOFile;
}
namespace VideoCore {
enum class ProgramType : u32 {
VertexShader = 0,
GeometryShader = 1,
FragmentShader = 2
};
// Describes a shader how it's used by the Pica GPU
class ShaderDiskCacheRaw {
public:
ShaderDiskCacheRaw() = default;
ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, Pica::Regs config,
std::vector<u32> program_code) : unique_identifier(unique_identifier),
program_type(program_type), config(config), program_code(program_code) {}
~ShaderDiskCacheRaw() = default;
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
// Returns the unique hash of the program code and pica registers
u64 GetUniqueIdentifier() const {
return unique_identifier;
}
// Returns the shader program type
ProgramType GetProgramType() const {
return program_type;
}
// Returns an immutable span to the program code
std::span<const u32> GetProgramCode() const {
return program_code;
}
// Returns the pica register state used to generate the program code
const Pica::Regs& GetRawShaderConfig() const {
return config;
}
private:
u64 unique_identifier = 0;
ProgramType program_type{};
Pica::Regs config{};
std::vector<u32> program_code{};
};
// Contains decompiled data from a shader
struct ShaderDiskCacheDecompiled {
std::string result;
bool sanitize_mul;
};
// Contains an OpenGL dumped binary program
struct ShaderDiskCacheDump {
//GLenum binary_format;
std::vector<u8> binary;
};
using ShaderDecompiledMap = std::unordered_map<u64, ShaderDiskCacheDecompiled>;
using ShaderDumpsMap = std::unordered_map<u64, ShaderDiskCacheDump>;
class BackendBase;
class ShaderDiskCache {
public:
ShaderDiskCache(std::unique_ptr<BackendBase>& backend);
~ShaderDiskCache() = default;
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheRaw>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::pair<ShaderDecompiledMap, ShaderDumpsMap> LoadPrecompiled(bool compressed);
/// Removes the transferable (and precompiled) cache file.
void InvalidateAll();
/// Removes the precompiled cache file and clears virtual precompiled cache file.
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveRaw(const ShaderDiskCacheRaw& entry);
/// Saves a decompiled entry to the precompiled file. Does not check for collisions.
void SaveDecompiled(u64 unique_identifier, const std::string& code, bool sanitize_mul);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(u64 unique_identifier, ShaderHandle shader);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDumpToFile(u64 unique_identifier, ShaderHandle shader, bool sanitize_mul);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::pair<ShaderDecompiledMap, ShaderDumpsMap>> LoadPrecompiledFile(
FileUtil::IOFile& file, bool compressed);
/// Loads a decompiled cache entry from m_precompiled_cache_virtual_file.
/// Returns empty on failure.
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
/// Saves a decompiled entry to the passed file. Does not check for collisions.
void SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier,
const std::string& code, bool sanitize_mul);
/// Saves a decompiled entry to the virtual precompiled cache. Does not check for collisions.
bool SaveDecompiledToCache(u64 unique_identifier, const std::string& code, bool sanitize_mul);
/// Returns if the cache can be used
bool IsUsable() const;
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile();
/// Save precompiled header to precompiled_cache_in_memory
void SavePrecompiledHeaderToVirtualPrecompiledCache();
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
/// Gets current game's transferable file path
std::string GetTransferablePath();
/// Gets current game's precompiled file path
std::string GetPrecompiledPath();
/// Get user's transferable directory path
std::string GetTransferableDir() const;
/// Get user's precompiled directory path
std::string GetPrecompiledDir() const;
std::string GetPrecompiledShaderDir() const;
/// Get user's shader directory path
std::string GetBaseDir() const;
/// Get current game's title id as u64
u64 GetProgramID();
/// Get current game's title id
std::string GetTitleID();
template <typename T>
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
const u8* data_view = reinterpret_cast<const u8*>(data);
decompressed_precompiled_cache.insert(decompressed_precompiled_cache.end(), &data_view[0],
&data_view[length * sizeof(T)]);
decompressed_precompiled_cache_offset += length * sizeof(T);
return true;
}
template <typename T>
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
u8* data_view = reinterpret_cast<u8*>(data);
std::copy_n(decompressed_precompiled_cache.data() + decompressed_precompiled_cache_offset,
length * sizeof(T), data_view);
decompressed_precompiled_cache_offset += length * sizeof(T);
return true;
}
template <typename T>
bool SaveObjectToPrecompiled(const T& object) {
return SaveArrayToPrecompiled(&object, 1);
}
bool SaveObjectToPrecompiled(bool object) {
const auto value = static_cast<u8>(object);
return SaveArrayToPrecompiled(&value, 1);
}
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
private:
std::unique_ptr<BackendBase>& backend;
// Stores whole precompiled cache which will be read from or saved to the precompiled cache file
std::vector<u8> decompressed_precompiled_cache;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t decompressed_precompiled_cache_offset = 0;
// Stored transferable shaders
std::unordered_map<u64, ShaderDiskCacheRaw> transferable;
// The cache has been loaded at boot
bool tried_to_load{};
u64 program_id{};
std::string title_id;
FileUtil::IOFile AppendPrecompiledFile();
};
} // namespace OpenGL

View File

@@ -199,20 +199,20 @@ public:
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
virtual std::string GenerateTrivialVertexShader(bool separable_shader) = 0;
virtual std::string GenerateTrivialVertexShader() = 0;
/**
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code
*/
virtual std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
bool separable_shader) = 0;
virtual std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup,
const PicaVSConfig& config) = 0;
/**
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
* @returns String of the shader source code
*/
virtual std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) = 0;
virtual std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) = 0;
/**
* Generates the GLSL fragment shader program source code for the current Pica state
@@ -221,7 +221,7 @@ public:
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
virtual std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) = 0;
virtual std::string GenerateFragmentShader(const PicaFSConfig& config) = 0;
};
} // namespace VideoCore

View File

@@ -0,0 +1,118 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <memory>
#include <unordered_map>
#include "video_core/common/backend.h"
#include "video_core/common/shader_gen.h"
namespace VideoCore {
using ShaderCacheResult = std::tuple<ShaderHandle, std::optional<std::string>>;
template <typename KeyType>
using ShaderGenerator = std::string(ShaderGeneratorBase::*)(const KeyType&);
template <typename KeyType, ShaderGenerator<KeyType> CodeGenerator, ShaderStage stage>
class ShaderCache {
public:
ShaderCache(std::unique_ptr<BackendBase>& backend,
std::unique_ptr<ShaderGeneratorBase>& generator) :
backend(backend), generator(generator) {}
~ShaderCache() = default;
// Returns a shader handle generated from the provided config
ShaderCacheResult Get(const KeyType& config) {
auto [iter, new_shader] = shaders.emplace(config, ShaderHandle{});
ShaderHandle& shader = iter->second;
if (new_shader) {
auto result = (generator.get()->*CodeGenerator)(config);
shader = backend->CreateShader(stage, "Cached shader", result);
shader->Compile(ShaderOptimization::Debug); // TODO: Change this
return std::make_tuple(shader, result);
}
return std::make_tuple(shader, std::nullopt);
}
void Inject(const KeyType& key, ShaderHandle&& shader) {
shaders.emplace(key, std::move(shader));
}
private:
std::unique_ptr<BackendBase>& backend;
std::unique_ptr<ShaderGeneratorBase>& generator;
std::unordered_map<KeyType, ShaderHandle> shaders;
};
template <typename KeyType>
using PicaShaderGenerator = std::string (ShaderGeneratorBase::*)(const Pica::Shader::ShaderSetup&,
const KeyType&);
/**
* This is a cache designed for shaders translated from PICA shaders. The first cache matches the
* config structure like a normal cache does. On cache miss, the second cache matches the generated
* GLSL code. The configuration is like this because there might be leftover code in the PICA shader
* program buffer from the previous shader, which is hashed into the config, resulting several
* different config values from the same shader program.
*/
template <typename KeyType, PicaShaderGenerator<KeyType> CodeGenerator, ShaderStage stage>
class ShaderDoubleCache {
public:
ShaderDoubleCache(std::unique_ptr<BackendBase>& backend,
std::unique_ptr<ShaderGeneratorBase>& generator) :
backend(backend), generator(generator) {}
~ShaderDoubleCache() = default;
ShaderCacheResult Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup) {
if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
std::string program = (generator.get()->*CodeGenerator)(setup, key);
auto [iter, new_shader] = shader_cache.emplace(program, ShaderHandle{});
ShaderHandle& shader = iter->second;
if (new_shader) {
shader = backend->CreateShader(stage, "Cached shader", program);
shader->Compile(ShaderOptimization::Debug); // TODO: Change this
}
shader_map[key] = &shader;
return std::make_tuple(shader, std::move(program));
} else {
return std::make_tuple(*map_iter->second, std::nullopt);
}
}
void Inject(const KeyType& key, std::string decomp, ShaderHandle&& program) {
const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first;
ShaderHandle& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
private:
std::unique_ptr<BackendBase>& backend;
std::unique_ptr<ShaderGeneratorBase>& generator;
std::unordered_map<KeyType, ShaderHandle*> shader_map;
std::unordered_map<std::string, ShaderHandle> shader_cache;
};
// Define shader cache types for convenience
using FragmentShaders = ShaderCache<PicaFSConfig,
&ShaderGeneratorBase::GenerateFragmentShader,
ShaderStage::Fragment>;
using PicaVertexShaders = ShaderDoubleCache<PicaVSConfig,
&ShaderGeneratorBase::GenerateVertexShader,
ShaderStage::Vertex>;
using FixedGeometryShaders = ShaderCache<PicaFixedGSConfig,
&ShaderGeneratorBase::GenerateFixedGeometryShader,
ShaderStage::Geometry>;
} // namespace VideoCore

View File

@@ -66,6 +66,7 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&code_len, sizeof(u64)) != sizeof(u64)) {
return false;
}
program_code.resize(code_len);
if (file.ReadArray(program_code.data(), code_len) != code_len) {
return false;

View File

@@ -41,16 +41,17 @@ using ProgramCode = std::vector<u32>;
using ShaderDecompiledMap = std::unordered_map<u64, ShaderDiskCacheDecompiled>;
using ShaderDumpsMap = std::unordered_map<u64, ShaderDiskCacheDump>;
/// Describes a shader how it's used by the guest GPU
// Describes a shader how it's used by the guest GPU
class ShaderDiskCacheRaw {
public:
explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
RawShaderConfig config, ProgramCode program_code);
ShaderDiskCacheRaw() = default;
ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
Pica::Regs config, std::vector<u32> program_code) :
unique_identifier(unique_identifier), program_type(program_type), config(config),
program_code(program_code) {}
~ShaderDiskCacheRaw() = default;
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
u64 GetUniqueIdentifier() const {
@@ -61,19 +62,19 @@ public:
return program_type;
}
const ProgramCode& GetProgramCode() const {
const std::vector<u32>& GetProgramCode() const {
return program_code;
}
const RawShaderConfig& GetRawShaderConfig() const {
const Pica::Regs& GetRawShaderConfig() const {
return config;
}
private:
u64 unique_identifier{};
u64 unique_identifier = 0;
ProgramType program_type{};
RawShaderConfig config{};
ProgramCode program_code{};
Pica::Regs config{};
std::vector<u32> program_code{};
};
/// Contains decompiled data from a shader

View File

@@ -393,7 +393,7 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs,
// Save VS to the disk cache if its a new shader
if (result) {
auto& disk_cache = impl->disk_cache;
ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()};
std::vector<u32> program_code{setup.program_code.begin(), setup.program_code.end()};
program_code.insert(program_code.end(), setup.swizzle_data.begin(),
setup.swizzle_data.end());
const u64 unique_identifier = GetUniqueIdentifier(regs, program_code);
@@ -715,6 +715,7 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}

View File

@@ -5,6 +5,7 @@
#pragma once
#include <memory>
#include <array>
#include <glad/glad.h>
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_lighting.h"
@@ -22,39 +23,39 @@ namespace OpenGL {
enum class UniformBindings : GLuint { Common, VS, GS };
struct LightSrc {
alignas(16) GLvec3 specular_0;
alignas(16) GLvec3 specular_1;
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
alignas(16) GLvec3 spot_direction; // negated
GLfloat dist_atten_bias;
GLfloat dist_atten_scale;
alignas(16) Common::Vec3f specular_0;
alignas(16) Common::Vec3f specular_1;
alignas(16) Common::Vec3f diffuse;
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
GLint framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
GLfloat shadow_bias_constant;
GLfloat shadow_bias_linear;
GLint scissor_x1;
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
GLint fog_lut_offset;
GLint proctex_noise_lut_offset;
GLint proctex_color_map_offset;
GLint proctex_alpha_map_offset;
GLint proctex_lut_offset;
GLint proctex_diff_lut_offset;
GLfloat proctex_bias;
GLint shadow_texture_bias;
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4];
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
@@ -62,29 +63,29 @@ struct UniformData {
alignas(8) GLvec2 proctex_noise_p;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
alignas(16) GLvec4 clip_coef;
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec4f clip_coef;
};
static_assert(
sizeof(UniformData) == 0x4F0,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) == 0x4F0,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// NOTE: the same rule from UniformData also applies here.
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
struct BoolAligned {
alignas(16) GLint b;
alignas(16) int b;
};
GLvec4
std::array<BoolAligned, 16> bools;
alignas(16) std::array<GLuvec4, 4> i;
alignas(16) std::array<GLvec4, 96> f;
alignas(16) std::array<Common::Vec4u, 4> i;
alignas(16) std::array<Common::Vec4f, 96> f;
};
struct VSUniformData {