shader: Accelerate pipeline transitions and use dirty flags for shaders

This commit is contained in:
ReinUsesLisp 2021-04-24 18:27:25 -03:00 committed by ameerj
parent 20e86fd615
commit f4ace63957
9 changed files with 114 additions and 64 deletions

View File

@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(table, OFF(zeta), NUM(zeta), flag);
}
}
void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
FillBlock(tables[0], OFF(shader_config[0]),
NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
}
} // Anonymous namespace
void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
SetupIndexBuffer(tables);
SetupDirtyDescriptors(tables);
SetupDirtyRenderTargets(tables);
SetupDirtyShaders(tables);
}
} // namespace VideoCommon::Dirty

View File

@ -36,6 +36,8 @@ enum : u8 {
IndexBuffer,
Shaders,
LastCommonEntry,
};

View File

@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;

View File

@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
void SetupDirtyShaders(Tables& tables) {
FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
Shaders);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);

View File

@ -52,7 +52,6 @@ enum : u8 {
BlendState0,
BlendState7 = BlendState0 + 7,
Shaders,
ClipDistances,
PolygonModes,

View File

@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache,
const FixedPipelineState& state_,
const GraphicsPipelineCacheKey& key_,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos)
: maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
: key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_},
buffer_cache{buffer_cache_}, scheduler{scheduler_},
update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{
std::move(stages)} {
update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
pipeline_layout = builder.CreatePipelineLayout(set_layout);
descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout);
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))};
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
MakePipeline(device, render_pass);
std::lock_guard lock{build_mutex};
@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_,
}
}
void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
transition_keys.push_back(transition->key);
transitions.push_back(transition);
}
void GraphicsPipeline::Configure(bool is_indexed) {
static constexpr size_t max_images_elements = 64;
std::array<ImageId, max_images_elements> image_view_ids;
@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) {
void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) {
FixedPipelineState::DynamicState dynamic{};
if (!device.IsExtExtendedDynamicStateSupported()) {
dynamic = state.dynamic_state;
dynamic = key.state.dynamic_state;
}
static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const bool instanced = state.binding_divisors[index] != 0;
const bool instanced = key.state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vertex_bindings.push_back({
.binding = static_cast<u32>(index),
@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
if (instanced) {
vertex_binding_divisors.push_back({
.binding = static_cast<u32>(index),
.divisor = state.binding_divisors[index],
.divisor = key.state.binding_divisors[index],
});
}
}
static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
const auto& input_attributes = stage_infos[0].input_generics;
for (size_t index = 0; index < state.attributes.size(); ++index) {
const auto& attribute = state.attributes[index];
for (size_t index = 0; index < key.state.attributes.size(); ++index) {
const auto& attribute = key.state.attributes[index];
if (!attribute.enabled || !input_attributes[index].used) {
continue;
}
@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
if (!vertex_binding_divisors.empty()) {
vertex_input_ci.pNext = &input_divisor_ci;
}
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
if (!spv_modules[1] && !spv_modules[2]) {
LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
.pNext = nullptr,
.flags = 0,
.topology = input_assembly_topology,
.primitiveRestartEnable = state.primitive_restart_enable != 0 &&
.primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
SupportsPrimitiveRestart(input_assembly_topology),
};
const VkPipelineTessellationStateCreateInfo tessellation_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
.patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
};
VkPipelineViewportStateCreateInfo viewport_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
.pScissors = nullptr,
};
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
.pNext = nullptr,
@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
.pNext = nullptr,
.flags = 0,
.depthClampEnable =
static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
.rasterizerDiscardEnable =
static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
.polygonMode =
MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)),
MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
.cullMode = static_cast<VkCullModeFlags>(
dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
.depthBiasEnable = state.depth_bias_enable,
.depthBiasEnable = key.state.depth_bias_enable,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode),
.rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
.maxDepthBounds = 0.0f,
};
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const size_t num_attachments{NumAttachments(state)};
const size_t num_attachments{NumAttachments(key.state)};
for (size_t index = 0; index < num_attachments; ++index) {
static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa
VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT,
};
const auto& blend{state.attachments[index]};
const auto& blend{key.state.attachments[index]};
const std::array mask{blend.Mask()};
VkColorComponentFlags write_mask{};
for (size_t i = 0; i < mask_table.size(); ++i) {

View File

@ -4,10 +4,12 @@
#pragma once
#include <algorithm>
#include <array>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
@ -20,6 +22,39 @@
namespace Vulkan {
struct GraphicsPipelineCacheKey {
std::array<u128, 6> unique_hashes;
FixedPipelineState state;
size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
size_t Size() const noexcept {
return sizeof(unique_hashes) + state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
class Device;
class RenderPassCache;
class VKScheduler;
@ -35,7 +70,8 @@ public:
const Device& device, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
Common::ThreadWorker* worker_thread,
RenderPassCache& render_pass_cache, const FixedPipelineState& state,
RenderPassCache& render_pass_cache,
const GraphicsPipelineCacheKey& key,
std::array<vk::ShaderModule, NUM_STAGES> stages,
const std::array<const Shader::Info*, NUM_STAGES>& infos);
@ -47,16 +83,30 @@ public:
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
GraphicsPipeline(const GraphicsPipeline&) = delete;
void AddTransition(GraphicsPipeline* transition);
GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
if (key == current_key) {
return this;
}
const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
: nullptr;
}
private:
void MakePipeline(const Device& device, VkRenderPass render_pass);
const GraphicsPipelineCacheKey key;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::MemoryManager& gpu_memory;
TextureCache& texture_cache;
BufferCache& buffer_cache;
VKScheduler& scheduler;
VKUpdateDescriptorQueue& update_descriptor_queue;
const FixedPipelineState state;
std::vector<GraphicsPipelineCacheKey> transition_keys;
std::vector<GraphicsPipeline*> transitions;
std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
std::array<Shader::Info, NUM_STAGES> stage_infos;

View File

@ -21,6 +21,7 @@
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/frontend/maxwell/program.h"
#include "shader_recompiler/program_header.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (!RefreshStages()) {
current_pipeline = nullptr;
return nullptr;
}
graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
if (current_pipeline) {
GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
if (next) {
current_pipeline = next;
return current_pipeline;
}
}
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
auto& pipeline{pair->second};
if (!is_new) {
return pipeline.get();
if (is_new) {
pipeline = CreateGraphicsPipeline();
}
pipeline = CreateGraphicsPipeline();
return pipeline.get();
if (current_pipeline) {
current_pipeline->AddTransition(pipeline.get());
}
current_pipeline = pipeline.get();
return current_pipeline;
}
ComputePipeline* PipelineCache::CurrentComputePipeline() {
@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
}
bool PipelineCache::RefreshStages() {
auto& dirty{maxwell3d.dirty.flags};
if (!dirty[VideoCommon::Dirty::Shaders]) {
return last_valid_shaders;
}
dirty[VideoCommon::Dirty::Shaders] = false;
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() {
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
last_valid_shaders = false;
return false;
}
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() {
shader_infos[index] = shader_info;
graphics_key.unique_hashes[index] = shader_info->unique_hash;
}
last_valid_shaders = true;
return true;
}
@ -832,8 +852,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>(
maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool,
update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules),
infos);
update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos);
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {

View File

@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>)
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
struct GraphicsPipelineCacheKey {
std::array<u128, 6> unique_hashes;
FixedPipelineState state;
size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
size_t Size() const noexcept {
return sizeof(unique_hashes) + state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
} // namespace Vulkan
namespace std {
@ -89,13 +69,6 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
}
};
template <>
struct hash<Vulkan::GraphicsPipelineCacheKey> {
size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace Vulkan {
@ -181,7 +154,10 @@ private:
TextureCache& texture_cache;
GraphicsPipelineCacheKey graphics_key{};
GraphicsPipeline* current_pipeline{};
std::array<const ShaderInfo*, 6> shader_infos{};
bool last_valid_shaders{};
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;