shader/registry: Store graphics and compute metadata
Store information GLSL forces us to provide but it's dynamic state in hardware (workgroup sizes, primitive topology, shared memory size).
This commit is contained in:
parent
e8efd5a901
commit
0528be5c92
|
@ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
|
||||||
|
|
||||||
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
||||||
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
|
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
|
||||||
auto registry = std::make_shared<Registry>(entry.type, guest_profile);
|
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
|
||||||
registry->SetBoundBuffer(entry.bound_buffer);
|
entry.graphics_info, entry.compute_info};
|
||||||
|
const auto registry = std::make_shared<Registry>(entry.type, info);
|
||||||
for (const auto& [address, value] : entry.keys) {
|
for (const auto& [address, value] : entry.keys) {
|
||||||
const auto [buffer, offset] = address;
|
const auto [buffer, offset] = address;
|
||||||
registry->InsertKey(buffer, offset, value);
|
registry->InsertKey(buffer, offset, value);
|
||||||
|
@ -184,9 +185,9 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
||||||
|
|
||||||
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
|
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
|
||||||
u64 unique_identifier, const ShaderIR& ir,
|
u64 unique_identifier, const ShaderIR& ir,
|
||||||
bool hint_retrievable = false) {
|
const Registry& registry, bool hint_retrievable = false) {
|
||||||
LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type));
|
LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type));
|
||||||
const std::string glsl = DecompileShader(device, ir, shader_type);
|
const std::string glsl = DecompileShader(device, ir, registry, shader_type);
|
||||||
OGLShader shader;
|
OGLShader shader;
|
||||||
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
|
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
|
||||||
|
|
||||||
|
@ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
// if (!code_b.empty()) {
|
// if (!code_b.empty()) {
|
||||||
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
|
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
|
||||||
// }
|
// }
|
||||||
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir);
|
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
|
||||||
|
|
||||||
ShaderDiskCacheEntry entry;
|
ShaderDiskCacheEntry entry;
|
||||||
entry.type = shader_type;
|
entry.type = shader_type;
|
||||||
|
@ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
entry.code_b = std::move(code_b);
|
entry.code_b = std::move(code_b);
|
||||||
entry.unique_identifier = params.unique_identifier;
|
entry.unique_identifier = params.unique_identifier;
|
||||||
entry.bound_buffer = registry->GetBoundBuffer();
|
entry.bound_buffer = registry->GetBoundBuffer();
|
||||||
|
entry.graphics_info = registry->GetGraphicsInfo();
|
||||||
entry.keys = registry->GetKeys();
|
entry.keys = registry->GetKeys();
|
||||||
entry.bound_samplers = registry->GetBoundSamplers();
|
entry.bound_samplers = registry->GetBoundSamplers();
|
||||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||||
|
@ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
|
|
||||||
auto registry =
|
auto& engine = params.system.GPU().KeplerCompute();
|
||||||
std::make_shared<Registry>(ShaderType::Compute, params.system.GPU().KeplerCompute());
|
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
|
||||||
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||||
auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir);
|
const u64 uid = params.unique_identifier;
|
||||||
|
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
|
||||||
|
|
||||||
ShaderDiskCacheEntry entry;
|
ShaderDiskCacheEntry entry;
|
||||||
entry.type = ShaderType::Compute;
|
entry.type = ShaderType::Compute;
|
||||||
entry.code = std::move(code);
|
entry.code = std::move(code);
|
||||||
entry.unique_identifier = params.unique_identifier;
|
entry.unique_identifier = uid;
|
||||||
entry.bound_buffer = registry->GetBoundBuffer();
|
entry.bound_buffer = registry->GetBoundBuffer();
|
||||||
|
entry.compute_info = registry->GetComputeInfo();
|
||||||
entry.keys = registry->GetKeys();
|
entry.keys = registry->GetKeys();
|
||||||
entry.bound_samplers = registry->GetBoundSamplers();
|
entry.bound_samplers = registry->GetBoundSamplers();
|
||||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||||
|
@ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto& entry = (*transferable)[i];
|
const auto& entry = (*transferable)[i];
|
||||||
const u64 unique_identifier = entry.unique_identifier;
|
const u64 uid = entry.unique_identifier;
|
||||||
const auto it = find_precompiled(unique_identifier);
|
const auto it = find_precompiled(uid);
|
||||||
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
|
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
|
||||||
|
|
||||||
const bool is_compute = entry.type == ShaderType::Compute;
|
const bool is_compute = entry.type == ShaderType::Compute;
|
||||||
|
@ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
}
|
}
|
||||||
if (!program) {
|
if (!program) {
|
||||||
// Otherwise compile it from GLSL
|
// Otherwise compile it from GLSL
|
||||||
program = BuildShader(device, entry.type, unique_identifier, ir, true);
|
program = BuildShader(device, entry.type, uid, ir, *registry, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
PrecompiledShader shader;
|
PrecompiledShader shader;
|
||||||
|
|
|
@ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode;
|
||||||
using Tegra::Shader::IpaMode;
|
using Tegra::Shader::IpaMode;
|
||||||
using Tegra::Shader::IpaSampleMode;
|
using Tegra::Shader::IpaSampleMode;
|
||||||
using Tegra::Shader::Register;
|
using Tegra::Shader::Register;
|
||||||
|
using VideoCommon::Shader::Registry;
|
||||||
|
|
||||||
using namespace std::string_literals;
|
using namespace std::string_literals;
|
||||||
using namespace VideoCommon::Shader;
|
using namespace VideoCommon::Shader;
|
||||||
|
@ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Describes primitive behavior on geometry shaders
|
||||||
|
std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
|
||||||
|
switch (topology) {
|
||||||
|
case Maxwell::PrimitiveTopology::Points:
|
||||||
|
return {"points", 1};
|
||||||
|
case Maxwell::PrimitiveTopology::Lines:
|
||||||
|
case Maxwell::PrimitiveTopology::LineStrip:
|
||||||
|
return {"lines", 2};
|
||||||
|
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||||
|
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||||
|
return {"lines_adjacency", 4};
|
||||||
|
case Maxwell::PrimitiveTopology::Triangles:
|
||||||
|
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||||
|
case Maxwell::PrimitiveTopology::TriangleFan:
|
||||||
|
return {"triangles", 3};
|
||||||
|
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||||
|
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||||
|
return {"triangles_adjacency", 6};
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
|
||||||
|
return {"points", 1};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Generates code to use for a swizzle operation.
|
/// Generates code to use for a swizzle operation.
|
||||||
constexpr const char* GetSwizzle(std::size_t element) {
|
constexpr const char* GetSwizzle(std::size_t element) {
|
||||||
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
|
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
|
||||||
|
@ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||||
|
|
||||||
class GLSLDecompiler final {
|
class GLSLDecompiler final {
|
||||||
public:
|
public:
|
||||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
|
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
std::string_view suffix)
|
ShaderType stage, std::string_view suffix)
|
||||||
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
|
: device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix},
|
||||||
|
header{ir.GetHeader()} {}
|
||||||
|
|
||||||
void Decompile() {
|
void Decompile() {
|
||||||
DeclareHeader();
|
DeclareHeader();
|
||||||
DeclareVertex();
|
DeclareVertex();
|
||||||
DeclareGeometry();
|
DeclareGeometry();
|
||||||
DeclareFragment();
|
DeclareFragment();
|
||||||
|
DeclareCompute();
|
||||||
DeclareRegisters();
|
DeclareRegisters();
|
||||||
DeclareCustomVariables();
|
DeclareCustomVariables();
|
||||||
DeclarePredicates();
|
DeclarePredicates();
|
||||||
|
@ -489,9 +516,15 @@ private:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto& info = registry.GetGraphicsInfo();
|
||||||
|
const auto input_topology = info.primitive_topology;
|
||||||
|
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
|
||||||
|
max_input_vertices = max_vertices;
|
||||||
|
code.AddLine("layout ({}) in;", glsl_topology);
|
||||||
|
|
||||||
const auto topology = GetTopologyName(header.common3.output_topology);
|
const auto topology = GetTopologyName(header.common3.output_topology);
|
||||||
const auto max_vertices = header.common4.max_output_vertices.Value();
|
const auto max_output_vertices = header.common4.max_output_vertices.Value();
|
||||||
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
|
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
|
||||||
code.AddNewLine();
|
code.AddNewLine();
|
||||||
|
|
||||||
code.AddLine("in gl_PerVertex {{");
|
code.AddLine("in gl_PerVertex {{");
|
||||||
|
@ -513,7 +546,8 @@ private:
|
||||||
if (!IsRenderTargetEnabled(render_target)) {
|
if (!IsRenderTargetEnabled(render_target)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target);
|
code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target,
|
||||||
|
render_target);
|
||||||
any = true;
|
any = true;
|
||||||
}
|
}
|
||||||
if (any) {
|
if (any) {
|
||||||
|
@ -521,6 +555,20 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DeclareCompute() {
|
||||||
|
if (stage != ShaderType::Compute) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto& info = registry.GetComputeInfo();
|
||||||
|
if (const u32 size = info.shared_memory_size_in_words; size > 0) {
|
||||||
|
code.AddLine("shared uint smem[];", size);
|
||||||
|
code.AddNewLine();
|
||||||
|
}
|
||||||
|
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
|
||||||
|
info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
|
||||||
|
code.AddNewLine();
|
||||||
|
}
|
||||||
|
|
||||||
void DeclareVertexRedeclarations() {
|
void DeclareVertexRedeclarations() {
|
||||||
code.AddLine("out gl_PerVertex {{");
|
code.AddLine("out gl_PerVertex {{");
|
||||||
++code.scope;
|
++code.scope;
|
||||||
|
@ -596,18 +644,16 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeclareLocalMemory() {
|
void DeclareLocalMemory() {
|
||||||
|
u64 local_memory_size = 0;
|
||||||
if (stage == ShaderType::Compute) {
|
if (stage == ShaderType::Compute) {
|
||||||
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
|
local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
|
||||||
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
|
} else {
|
||||||
code.AddLine("#endif");
|
local_memory_size = header.GetLocalMemorySize();
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 local_memory_size = header.GetLocalMemorySize();
|
|
||||||
if (local_memory_size == 0) {
|
if (local_memory_size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
|
const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
|
||||||
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
|
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
|
||||||
code.AddNewLine();
|
code.AddNewLine();
|
||||||
}
|
}
|
||||||
|
@ -996,7 +1042,8 @@ private:
|
||||||
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
|
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
|
||||||
// set an 0x80000000 index for those and the shader fails to build. Find out why
|
// set an 0x80000000 index for those and the shader fails to build. Find out why
|
||||||
// this happens and what's its intent.
|
// this happens and what's its intent.
|
||||||
return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
|
return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
|
||||||
|
max_input_vertices.value());
|
||||||
}
|
}
|
||||||
return std::string(name);
|
return std::string(name);
|
||||||
};
|
};
|
||||||
|
@ -2428,11 +2475,14 @@ private:
|
||||||
|
|
||||||
const Device& device;
|
const Device& device;
|
||||||
const ShaderIR& ir;
|
const ShaderIR& ir;
|
||||||
|
const Registry& registry;
|
||||||
const ShaderType stage;
|
const ShaderType stage;
|
||||||
const std::string_view suffix;
|
const std::string_view suffix;
|
||||||
const Header header;
|
const Header header;
|
||||||
|
|
||||||
ShaderWriter code;
|
ShaderWriter code;
|
||||||
|
|
||||||
|
std::optional<u32> max_input_vertices;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string GetFlowVariable(u32 index) {
|
std::string GetFlowVariable(u32 index) {
|
||||||
|
@ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||||
return entries;
|
return entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage,
|
std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
std::string_view suffix) {
|
ShaderType stage, std::string_view suffix) {
|
||||||
GLSLDecompiler decompiler(device, ir, stage, suffix);
|
GLSLDecompiler decompiler(device, ir, registry, stage, suffix);
|
||||||
decompiler.Decompile();
|
decompiler.Decompile();
|
||||||
return decompiler.GetResult();
|
return decompiler.GetResult();
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,12 +12,9 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
|
#include "video_core/shader/registry.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
|
||||||
class ShaderIR;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
class Device;
|
class Device;
|
||||||
|
@ -80,6 +77,7 @@ struct ShaderEntries {
|
||||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
|
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
|
||||||
|
|
||||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||||
|
const VideoCommon::Shader::Registry& registry,
|
||||||
Tegra::Engines::ShaderType stage, std::string_view suffix = {});
|
Tegra::Engines::ShaderType stage, std::string_view suffix = {});
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -48,7 +48,7 @@ struct BindlessSamplerKey {
|
||||||
Tegra::Engines::SamplerDescriptor sampler;
|
Tegra::Engines::SamplerDescriptor sampler;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr u32 NativeVersion = 16;
|
constexpr u32 NativeVersion = 17;
|
||||||
|
|
||||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||||
ShaderCacheVersionHash hash{};
|
ShaderCacheVersionHash hash{};
|
||||||
|
@ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_texture_handler_size_known;
|
u8 is_texture_handler_size_known;
|
||||||
u32 texture_handler_size_value;
|
u32 texture_handler_size_value;
|
||||||
u32 num_keys;
|
u32 num_keys;
|
||||||
u32 num_bound_samplers;
|
u32 num_bound_samplers;
|
||||||
u32 num_bindless_samplers;
|
u32 num_bindless_samplers;
|
||||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||||
file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
|
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||||
|
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
|
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
|
||||||
file.WriteObject(texture_handler_size.has_value()) != 1 ||
|
file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
|
||||||
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
|
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
|
||||||
|
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||||
|
|
|
@ -51,8 +51,10 @@ struct ShaderDiskCacheEntry {
|
||||||
ProgramCode code_b;
|
ProgramCode code_b;
|
||||||
|
|
||||||
u64 unique_identifier = 0;
|
u64 unique_identifier = 0;
|
||||||
u32 bound_buffer = 0;
|
|
||||||
std::optional<u32> texture_handler_size;
|
std::optional<u32> texture_handler_size;
|
||||||
|
u32 bound_buffer = 0;
|
||||||
|
VideoCommon::Shader::GraphicsInfo graphics_info;
|
||||||
|
VideoCommon::Shader::ComputeInfo compute_info;
|
||||||
VideoCommon::Shader::KeyMap keys;
|
VideoCommon::Shader::KeyMap keys;
|
||||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||||
|
|
|
@ -6,21 +6,55 @@
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/engines/kepler_compute.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/shader/registry.h"
|
#include "video_core/shader/registry.h"
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
using Tegra::Engines::ConstBufferEngineInterface;
|
||||||
using Tegra::Engines::SamplerDescriptor;
|
using Tegra::Engines::SamplerDescriptor;
|
||||||
|
using Tegra::Engines::ShaderType;
|
||||||
|
|
||||||
Registry::Registry(Tegra::Engines::ShaderType shader_stage,
|
namespace {
|
||||||
VideoCore::GuestDriverProfile stored_guest_driver_profile)
|
|
||||||
: stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {}
|
GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
|
||||||
|
if (shader_stage == ShaderType::Compute) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
|
||||||
|
|
||||||
|
GraphicsInfo info;
|
||||||
|
info.primitive_topology = graphics.regs.draw.topology;
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
|
||||||
|
if (shader_stage != ShaderType::Compute) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
|
||||||
|
const auto& launch = compute.launch_description;
|
||||||
|
|
||||||
|
ComputeInfo info;
|
||||||
|
info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
|
||||||
|
info.local_memory_size_in_words = launch.local_pos_alloc;
|
||||||
|
info.shared_memory_size_in_words = launch.shared_alloc;
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
|
||||||
|
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
|
||||||
|
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
|
||||||
|
|
||||||
Registry::Registry(Tegra::Engines::ShaderType shader_stage,
|
Registry::Registry(Tegra::Engines::ShaderType shader_stage,
|
||||||
Tegra::Engines::ConstBufferEngineInterface& engine)
|
Tegra::Engines::ConstBufferEngineInterface& engine)
|
||||||
: stage{shader_stage}, engine{&engine} {}
|
: stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
|
||||||
|
graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
|
||||||
|
shader_stage, engine)} {}
|
||||||
|
|
||||||
Registry::~Registry() = default;
|
Registry::~Registry() = default;
|
||||||
|
|
||||||
|
@ -67,18 +101,6 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<u32> Registry::ObtainBoundBuffer() {
|
|
||||||
if (bound_buffer_saved) {
|
|
||||||
return bound_buffer;
|
|
||||||
}
|
|
||||||
if (!engine) {
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
bound_buffer_saved = true;
|
|
||||||
bound_buffer = engine->GetBoundBuffer();
|
|
||||||
return bound_buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
|
void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
|
||||||
keys.insert_or_assign({buffer, offset}, value);
|
keys.insert_or_assign({buffer, offset}, value);
|
||||||
}
|
}
|
||||||
|
@ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s
|
||||||
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
|
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Registry::SetBoundBuffer(u32 buffer) {
|
|
||||||
bound_buffer_saved = true;
|
|
||||||
bound_buffer = buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Registry::IsConsistent() const {
|
bool Registry::IsConsistent() const {
|
||||||
if (!engine) {
|
if (!engine) {
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -4,11 +4,16 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/hash.h"
|
#include "common/hash.h"
|
||||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/guest_driver.h"
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
|
@ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescripto
|
||||||
using BindlessSamplerMap =
|
using BindlessSamplerMap =
|
||||||
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
||||||
|
|
||||||
|
struct GraphicsInfo {
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{};
|
||||||
|
};
|
||||||
|
static_assert(std::is_trivially_copyable_v<GraphicsInfo>);
|
||||||
|
|
||||||
|
struct ComputeInfo {
|
||||||
|
std::array<u32, 3> workgroup_size{};
|
||||||
|
u32 shared_memory_size_in_words = 0;
|
||||||
|
u32 local_memory_size_in_words = 0;
|
||||||
|
};
|
||||||
|
static_assert(std::is_trivially_copyable_v<ComputeInfo>);
|
||||||
|
|
||||||
|
struct SerializedRegistryInfo {
|
||||||
|
VideoCore::GuestDriverProfile guest_driver_profile;
|
||||||
|
u32 bound_buffer = 0;
|
||||||
|
GraphicsInfo graphics;
|
||||||
|
ComputeInfo compute;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Registry is a class use to interface the 3D and compute engines with the shader compiler.
|
* The Registry is a class use to interface the 3D and compute engines with the shader compiler.
|
||||||
* With it, the shader can obtain required data from GPU state and store it for disk shader
|
* With it, the shader can obtain required data from GPU state and store it for disk shader
|
||||||
|
@ -26,8 +50,7 @@ using BindlessSamplerMap =
|
||||||
*/
|
*/
|
||||||
class Registry {
|
class Registry {
|
||||||
public:
|
public:
|
||||||
explicit Registry(Tegra::Engines::ShaderType shader_stage,
|
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
|
||||||
VideoCore::GuestDriverProfile stored_guest_driver_profile);
|
|
||||||
|
|
||||||
explicit Registry(Tegra::Engines::ShaderType shader_stage,
|
explicit Registry(Tegra::Engines::ShaderType shader_stage,
|
||||||
Tegra::Engines::ConstBufferEngineInterface& engine);
|
Tegra::Engines::ConstBufferEngineInterface& engine);
|
||||||
|
@ -42,8 +65,6 @@ public:
|
||||||
|
|
||||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
||||||
|
|
||||||
std::optional<u32> ObtainBoundBuffer();
|
|
||||||
|
|
||||||
/// Inserts a key.
|
/// Inserts a key.
|
||||||
void InsertKey(u32 buffer, u32 offset, u32 value);
|
void InsertKey(u32 buffer, u32 offset, u32 value);
|
||||||
|
|
||||||
|
@ -53,9 +74,6 @@ public:
|
||||||
/// Inserts a bindless sampler key.
|
/// Inserts a bindless sampler key.
|
||||||
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
|
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
|
||||||
|
|
||||||
/// Set the bound buffer for this registry.
|
|
||||||
void SetBoundBuffer(u32 buffer);
|
|
||||||
|
|
||||||
/// Checks keys and samplers against engine's current const buffers.
|
/// Checks keys and samplers against engine's current const buffers.
|
||||||
/// Returns true if they are the same value, false otherwise.
|
/// Returns true if they are the same value, false otherwise.
|
||||||
bool IsConsistent() const;
|
bool IsConsistent() const;
|
||||||
|
@ -83,6 +101,18 @@ public:
|
||||||
return bound_buffer;
|
return bound_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns compute information from this shader
|
||||||
|
const GraphicsInfo& GetGraphicsInfo() const {
|
||||||
|
ASSERT(stage != Tegra::Engines::ShaderType::Compute);
|
||||||
|
return graphics_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns compute information from this shader
|
||||||
|
const ComputeInfo& GetComputeInfo() const {
|
||||||
|
ASSERT(stage == Tegra::Engines::ShaderType::Compute);
|
||||||
|
return compute_info;
|
||||||
|
}
|
||||||
|
|
||||||
/// Obtains access to the guest driver's profile.
|
/// Obtains access to the guest driver's profile.
|
||||||
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
|
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
|
||||||
return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
|
return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
|
||||||
|
@ -95,8 +125,9 @@ private:
|
||||||
KeyMap keys;
|
KeyMap keys;
|
||||||
BoundSamplerMap bound_samplers;
|
BoundSamplerMap bound_samplers;
|
||||||
BindlessSamplerMap bindless_samplers;
|
BindlessSamplerMap bindless_samplers;
|
||||||
bool bound_buffer_saved{};
|
u32 bound_buffer;
|
||||||
u32 bound_buffer{};
|
GraphicsInfo graphics_info;
|
||||||
|
ComputeInfo compute_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
||||||
|
|
|
@ -81,14 +81,11 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
|
||||||
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
|
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
|
||||||
return {tracked, track};
|
return {tracked, track};
|
||||||
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
|
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
|
||||||
const auto bound_buffer = registry.ObtainBoundBuffer();
|
const u32 bound_buffer = registry.GetBoundBuffer();
|
||||||
if (!bound_buffer) {
|
if (bound_buffer != cbuf->GetIndex()) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
if (*bound_buffer != cbuf->GetIndex()) {
|
const auto pair = DecoupleIndirectRead(*operation);
|
||||||
return {};
|
|
||||||
}
|
|
||||||
auto pair = DecoupleIndirectRead(*operation);
|
|
||||||
if (!pair) {
|
if (!pair) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue