fixed_pipeline_cache: Use dirty flags to lazily update key

Use dirty flags to avoid building pipeline key from scratch on each draw
call. This saves a bit of unnecesary work on each draw call.
This commit is contained in:
ReinUsesLisp 2021-01-19 02:04:27 -03:00
parent 95722823b9
commit 70353649d7
7 changed files with 103 additions and 56 deletions

View File

@ -12,14 +12,15 @@
#include "common/cityhash.h" #include "common/cityhash.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
namespace Vulkan { namespace Vulkan {
namespace { namespace {
constexpr std::size_t POINT = 0; constexpr size_t POINT = 0;
constexpr std::size_t LINE = 1; constexpr size_t LINE = 1;
constexpr std::size_t POLYGON = 2; constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points POINT, // Points
LINE, // Lines LINE, // Lines
@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
} // Anonymous namespace } // Anonymous namespace
void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
const std::array enabled_lut = {regs.polygon_offset_point_enable, bool has_extended_dynamic_state) {
regs.polygon_offset_line_enable, const Maxwell& regs = maxwell3d.regs;
regs.polygon_offset_fill_enable}; const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0; raw1 = 0;
@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
raw2 = 0; raw2 = 0;
const auto test_func = const auto test_func =
regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
alpha_test_func.Assign(PackComparisonOp(test_func)); alpha_test_func.Assign(PackComparisonOp(test_func));
early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
point_size = Common::BitCast<u32>(regs.point_size); point_size = Common::BitCast<u32>(regs.point_size);
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) {
binding_divisors[index] = maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false;
regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
}
} }
if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) {
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { maxwell3d.dirty.flags[Dirty::VertexAttributes] = false;
const auto& input = regs.vertex_attrib_format[index]; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
auto& attribute = attributes[index]; const auto& input = regs.vertex_attrib_format[index];
attribute.raw = 0; auto& attribute = attributes[index];
attribute.enabled.Assign(input.IsConstant() ? 0 : 1); attribute.raw = 0;
attribute.buffer.Assign(input.buffer); attribute.enabled.Assign(input.IsConstant() ? 0 : 1);
attribute.offset.Assign(input.offset); attribute.buffer.Assign(input.buffer);
attribute.type.Assign(static_cast<u32>(input.type.Value())); attribute.offset.Assign(input.offset);
attribute.size.Assign(static_cast<u32>(input.size.Value())); attribute.type.Assign(static_cast<u32>(input.type.Value()));
attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); attribute.size.Assign(static_cast<u32>(input.size.Value()));
}
} }
if (maxwell3d.dirty.flags[Dirty::Blending]) {
for (std::size_t index = 0; index < std::size(attachments); ++index) { maxwell3d.dirty.flags[Dirty::Blending] = false;
attachments[index].Fill(regs, index); for (size_t index = 0; index < attachments.size(); ++index) {
attachments[index].Refresh(regs, index);
}
}
if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) {
maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false;
const auto& transform = regs.viewport_transform;
std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) {
return static_cast<u16>(viewport.swizzle.raw);
});
} }
const auto& transform = regs.viewport_transform;
std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
[](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
if (!has_extended_dynamic_state) { if (!has_extended_dynamic_state) {
no_extended_dynamic_state.Assign(1); no_extended_dynamic_state.Assign(1);
dynamic_state.Fill(regs); dynamic_state.Refresh(regs);
} }
} }
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
raw = 0; raw = 0;
@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
enable.Assign(1); enable.Assign(1);
} }
void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.front_face); u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0) { if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face // Flip front face
@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
}); });
} }
std::size_t FixedPipelineState::Hash() const noexcept { size_t FixedPipelineState::Hash() const noexcept {
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
return static_cast<std::size_t>(hash); return static_cast<size_t>(hash);
} }
bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {

View File

@ -58,7 +58,7 @@ struct FixedPipelineState {
BitField<30, 1, u32> enable; BitField<30, 1, u32> enable;
}; };
void Fill(const Maxwell& regs, std::size_t index); void Refresh(const Maxwell& regs, size_t index);
constexpr std::array<bool, 4> Mask() const noexcept { constexpr std::array<bool, 4> Mask() const noexcept {
return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
@ -96,8 +96,6 @@ struct FixedPipelineState {
BitField<6, 14, u32> offset; BitField<6, 14, u32> offset;
BitField<20, 3, u32> type; BitField<20, 3, u32> type;
BitField<23, 6, u32> size; BitField<23, 6, u32> size;
// Not really an element of a vertex attribute, but it can be packed here
BitField<29, 1, u32> binding_index_enabled;
constexpr Maxwell::VertexAttribute::Type Type() const noexcept { constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
@ -108,7 +106,7 @@ struct FixedPipelineState {
} }
}; };
template <std::size_t Position> template <size_t Position>
union StencilFace { union StencilFace {
BitField<Position + 0, 3, u32> action_stencil_fail; BitField<Position + 0, 3, u32> action_stencil_fail;
BitField<Position + 3, 3, u32> action_depth_fail; BitField<Position + 3, 3, u32> action_depth_fail;
@ -152,7 +150,7 @@ struct FixedPipelineState {
// Vertex stride is a 12 bits value, we have 4 bits to spare per element // Vertex stride is a 12 bits value, we have 4 bits to spare per element
std::array<u16, Maxwell::NumVertexArrays> vertex_strides; std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
void Fill(const Maxwell& regs); void Refresh(const Maxwell& regs);
Maxwell::ComparisonOp DepthTestFunc() const noexcept { Maxwell::ComparisonOp DepthTestFunc() const noexcept {
return UnpackComparisonOp(depth_test_func); return UnpackComparisonOp(depth_test_func);
@ -199,9 +197,9 @@ struct FixedPipelineState {
std::array<u16, Maxwell::NumViewports> viewport_swizzles; std::array<u16, Maxwell::NumViewports> viewport_swizzles;
DynamicState dynamic_state; DynamicState dynamic_state;
void Fill(const Maxwell& regs, bool has_extended_dynamic_state); void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
std::size_t Hash() const noexcept; size_t Hash() const noexcept;
bool operator==(const FixedPipelineState& rhs) const noexcept; bool operator==(const FixedPipelineState& rhs) const noexcept;
@ -209,8 +207,8 @@ struct FixedPipelineState {
return !operator==(rhs); return !operator==(rhs);
} }
std::size_t Size() const noexcept { size_t Size() const noexcept {
const std::size_t total_size = sizeof *this; const size_t total_size = sizeof *this;
return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
} }
}; };
@ -224,7 +222,7 @@ namespace std {
template <> template <>
struct hash<Vulkan::FixedPipelineState> { struct hash<Vulkan::FixedPipelineState> {
std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
return k.Hash(); return k.Hash();
} }
}; };

View File

@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (state.attributes[index].binding_index_enabled == 0) {
continue;
}
const bool instanced = state.binding_divisors[index] != 0; const bool instanced = state.binding_divisors[index] != 0;
const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
vertex_bindings.push_back({ vertex_bindings.push_back({

View File

@ -267,8 +267,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters(); query_cache.UpdateCounters();
GraphicsPipelineCacheKey key; graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
@ -276,14 +275,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
const auto shaders = pipeline_cache.GetShaders(); const auto shaders = pipeline_cache.GetShaders();
key.shaders = GetShaderAddresses(shaders); graphics_key.shaders = GetShaderAddresses(shaders);
graphics_key.shaders = GetShaderAddresses(shaders);
SetupShaderDescriptors(shaders, is_indexed); SetupShaderDescriptors(shaders, is_indexed);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
key.renderpass = framebuffer->RenderPass(); graphics_key.renderpass = framebuffer->RenderPass();
auto* const pipeline = VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); graphics_key, framebuffer->NumColorBuffers(), async_shaders);
if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
// Async graphics pipeline was not ready. // Async graphics pipeline was not ready.
return; return;

View File

@ -20,6 +20,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -173,6 +174,8 @@ private:
VKUpdateDescriptorQueue update_descriptor_queue; VKUpdateDescriptorQueue update_descriptor_queue;
BlitImageHelper blit_image; BlitImageHelper blit_image;
GraphicsPipelineCacheKey graphics_key;
TextureCacheRuntime texture_cache_runtime; TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache; TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime; BufferCacheRuntime buffer_cache_runtime;

View File

@ -18,9 +18,7 @@
#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32)))
namespace Vulkan { namespace Vulkan {
namespace { namespace {
using namespace Dirty; using namespace Dirty;
using namespace VideoCommon::Dirty; using namespace VideoCommon::Dirty;
using Tegra::Engines::Maxwell3D; using Tegra::Engines::Maxwell3D;
@ -128,6 +126,34 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
tables[0][OFF(stencil_enable)] = StencilTestEnable; tables[0][OFF(stencil_enable)] = StencilTestEnable;
} }
void SetupDirtyBlending(Tables& tables) {
tables[0][OFF(color_mask_common)] = Blending;
tables[0][OFF(independent_blend_enable)] = Blending;
FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending);
FillBlock(tables[0], OFF(blend), NUM(blend), Blending);
FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
}
void SetupDirtyInstanceDivisors(Tables& tables) {
static constexpr size_t divisor_offset = 3;
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
tables[0][OFF(instanced_arrays) + index] = InstanceDivisors;
tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] =
InstanceDivisors;
}
}
void SetupDirtyVertexAttributes(Tables& tables) {
FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes);
}
void SetupDirtyViewportSwizzles(Tables& tables) {
static constexpr size_t swizzle_offset = 6;
for (size_t index = 0; index < Regs::NumViewports; ++index) {
tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
ViewportSwizzles;
}
}
} // Anonymous namespace } // Anonymous namespace
StateTracker::StateTracker(Tegra::GPU& gpu) StateTracker::StateTracker(Tegra::GPU& gpu)
@ -148,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyFrontFace(tables); SetupDirtyFrontFace(tables);
SetupDirtyStencilOp(tables); SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables); SetupDirtyStencilTestEnable(tables);
SetupDirtyBlending(tables);
SetupDirtyInstanceDivisors(tables);
SetupDirtyVertexAttributes(tables);
SetupDirtyViewportSwizzles(tables);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -35,6 +35,11 @@ enum : u8 {
StencilOp, StencilOp,
StencilTestEnable, StencilTestEnable,
Blending,
InstanceDivisors,
VertexAttributes,
ViewportSwizzles,
Last Last
}; };
static_assert(Last <= std::numeric_limits<u8>::max()); static_assert(Last <= std::numeric_limits<u8>::max());