From 69db7d9d0d408cf0ed33ff6afbb1d37ad58a9aa9 Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Sun, 16 Oct 2022 08:17:05 +0300 Subject: [PATCH] renderer_vulkan: Implement scaled uploads and downloads * This commit includes large changes to have textures are handling. Instead of using ImageAlloc, Surface is used instead which provides multiple benefits: automatic recycling on destruction and ability to use the TextureRuntime interface to simplify operations * Layout tracking is also implemented which allows transitioning of individual mip levels without errors * This fixes graphical errors in multiple games which relied on framebuffer uploads --- src/video_core/CMakeLists.txt | 3 + .../rasterizer_cache/rasterizer_cache.h | 1 + .../rasterizer_cache/surface_base.h | 2 +- .../renderer_vulkan/renderer_vulkan.cpp | 35 +- .../renderer_vulkan/vk_blit_helper.cpp | 161 +++++++ .../renderer_vulkan/vk_blit_helper.h | 39 ++ .../vk_format_reinterpreter.cpp | 8 +- .../renderer_vulkan/vk_layout_tracker.h | 80 ++++ .../renderer_vulkan/vk_rasterizer.cpp | 78 ++- .../renderer_vulkan/vk_rasterizer.h | 6 +- .../renderer_vulkan/vk_texture_runtime.cpp | 450 ++++++++++-------- .../renderer_vulkan/vk_texture_runtime.h | 72 ++- 12 files changed, 648 insertions(+), 287 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_blit_helper.cpp create mode 100644 src/video_core/renderer_vulkan/vk_blit_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_layout_tracker.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 798de651b..b9876fd18 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -84,10 +84,13 @@ add_library(video_core STATIC renderer_vulkan/pica_to_vk.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/renderer_vulkan.h + renderer_vulkan/vk_blit_helper.cpp + renderer_vulkan/vk_blit_helper.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h renderer_vulkan/vk_format_reinterpreter.cpp renderer_vulkan/vk_format_reinterpreter.h + renderer_vulkan/vk_layout_tracker.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_instance.cpp diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 56155e7e4..1531099ea 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #pragma once + #include #include #include diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 07599f0c1..e755e3f9a 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -48,7 +48,7 @@ class SurfaceBase : public SurfaceParams, public std::enable_shared_from_this using Watcher = SurfaceWatcher; public: - SurfaceBase(SurfaceParams& params) : SurfaceParams{params} {} + SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {} virtual ~SurfaceBase() = default; /// Returns true when this surface can be used to fill the fill_interval of dest_surface diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index dae0ce78a..b60e5ea92 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #define GLM_FORCE_DEPTH_ZERO_TO_ONE #include #include "common/assert.h" @@ -190,11 +189,9 @@ RendererVulkan::~RendererVulkan() { } for (auto& info : screen_infos) { - const VideoCore::HostTextureTag tag = { - .format = VideoCore::PixelFormatFromGPUPixelFormat(info.texture.format), - .width = info.texture.width, - .height = info.texture.height, - .layers = 1}; + const HostTextureTag tag = {.format = info.texture.alloc.format, + .width = info.texture.width, + .height = info.texture.height}; runtime.Recycle(tag, std::move(info.texture.alloc)); } @@ -548,25 +545,21 @@ void RendererVulkan::BuildPipelines() { void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer) { - TextureInfo old_texture = texture; - texture = TextureInfo{ - .alloc = - runtime.Allocate(framebuffer.width, framebuffer.height, - VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format), - VideoCore::TextureType::Texture2D), - .width = framebuffer.width, - .height = framebuffer.height, - .format = framebuffer.color_format, - }; + TextureInfo old_texture = std::move(texture); + texture = TextureInfo{.alloc = runtime.Allocate( + framebuffer.width, framebuffer.height, + VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format), + VideoCore::TextureType::Texture2D), + .width = framebuffer.width, + .height = framebuffer.height, + .format = framebuffer.color_format}; // Recyle the old texture after allocation to avoid having duplicates of the same allocation in // the recycler if (old_texture.width != 0 && old_texture.height != 0) { - const VideoCore::HostTextureTag tag = { - .format = VideoCore::PixelFormatFromGPUPixelFormat(old_texture.format), - .width = old_texture.width, - .height = old_texture.height, - .layers = 1}; + const HostTextureTag tag = {.format = old_texture.alloc.format, + .width = old_texture.width, + .height = old_texture.height}; runtime.Recycle(tag, std::move(old_texture.alloc)); } diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp new file mode 100644 index 000000000..8c4a7991b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -0,0 +1,161 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/vector_math.h" +#include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Vulkan { + +BlitHelper::BlitHelper(const Instance& instance, TaskScheduler& scheduler) + : scheduler{scheduler}, device{instance.GetDevice()} { + constexpr std::string_view cs_source = R"( +#version 450 core +#extension GL_EXT_samplerless_texture_functions : require +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +layout(set = 0, binding = 0) uniform highp texture2D depth; +layout(set = 0, binding = 1) uniform lowp utexture2D stencil; +layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color; + +layout(push_constant, std140) uniform ComputeInfo { + mediump ivec2 src_offset; +}; + +void main() { + ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy); + ivec2 tex_coord = src_offset + dst_coord; + + highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + highp uint value = stencil_val | (depth_val << 8); + imageStore(color, dst_coord, uvec4(value)); +} + +)"; + compute_shader = + Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); + + const std::array compute_layout_bindings = { + vk::DescriptorSetLayoutBinding{.binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}, + vk::DescriptorSetLayoutBinding{.binding = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}, + vk::DescriptorSetLayoutBinding{.binding = 2, + .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute}}; + + const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { + .bindingCount = static_cast(compute_layout_bindings.size()), + .pBindings = compute_layout_bindings.data()}; + + descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); + + const std::array update_template_entries = { + vk::DescriptorUpdateTemplateEntry{.dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo)}, + vk::DescriptorUpdateTemplateEntry{.dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = sizeof(vk::DescriptorImageInfo), + .stride = 0}, + vk::DescriptorUpdateTemplateEntry{.dstBinding = 2, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .offset = 2 * sizeof(vk::DescriptorImageInfo), + .stride = 0}}; + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), + .pDescriptorUpdateEntries = update_template_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = descriptor_layout}; + + update_template = device.createDescriptorUpdateTemplate(template_info); + + const vk::PushConstantRange push_range = { + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(Common::Vec2i), + }; + + const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1, + .pSetLayouts = &descriptor_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range}; + + compute_pipeline_layout = device.createPipelineLayout(layout_info); + + const vk::PipelineShaderStageCreateInfo compute_stage = { + .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"}; + + const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage, + .layout = compute_pipeline_layout}; + + if (const auto result = device.createComputePipeline({}, compute_info); + result.result == vk::Result::eSuccess) { + compute_pipeline = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); + UNREACHABLE(); + } +} + +BlitHelper::~BlitHelper() { + device.destroyPipeline(compute_pipeline); + device.destroyPipelineLayout(compute_pipeline_layout); + device.destroyDescriptorUpdateTemplate(update_template); + device.destroyDescriptorSetLayout(descriptor_layout); + device.destroyShaderModule(compute_shader); +} + +void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels); + dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels); + + const std::array textures = { + vk::DescriptorImageInfo{.imageView = source.GetDepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, + vk::DescriptorImageInfo{.imageView = source.GetStencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal}, + vk::DescriptorImageInfo{.imageView = dest.GetImageView(), + .imageLayout = vk::ImageLayout::eGeneral}}; + + const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool = + scheduler.GetDescriptorPool(), + .descriptorSetCount = 1, + .pSetLayouts = &descriptor_layout}; + + descriptor_set = device.allocateDescriptorSets(alloc_info)[0]; + + device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, + 1, &descriptor_set, 0, nullptr); + command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); + + const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); + command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(Common::Vec2i), src_offset.AsArray()); + + command_buffer.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h new file mode 100644 index 000000000..0aed257b9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -0,0 +1,39 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore { +struct TextureBlit; +} + +namespace Vulkan { + +class Instance; +class TaskScheduler; +class Surface; + +class BlitHelper { +public: + BlitHelper(const Instance& instance, TaskScheduler& scheduler); + ~BlitHelper(); + + /// Blits D24S8 pixel data to the provided buffer + void BlitD24S8ToR32(Surface& depth_surface, Surface& r32_surface, + const VideoCore::TextureBlit& blit); + +private: + TaskScheduler& scheduler; + vk::Device device; + vk::Pipeline compute_pipeline; + vk::PipelineLayout compute_pipeline_layout; + vk::DescriptorSetLayout descriptor_layout; + vk::DescriptorSet descriptor_set; + vk::DescriptorUpdateTemplate update_template; + vk::ShaderModule compute_shader; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp index 7863a527a..08127b933 100644 --- a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp @@ -124,10 +124,8 @@ D24S8toRGBA8::~D24S8toRGBA8() { void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest, VideoCore::Rect2D dst_rect) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - runtime.Transition(command_buffer, source.alloc, vk::ImageLayout::eDepthStencilReadOnlyOptimal, - 0, source.alloc.levels); - runtime.Transition(command_buffer, dest.alloc, vk::ImageLayout::eGeneral, 0, dest.alloc.levels); + source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels); + dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels); const std::array textures = { vk::DescriptorImageInfo{.imageView = source.GetDepthView(), @@ -145,6 +143,8 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf descriptor_set = device.allocateDescriptorSets(alloc_info)[0]; device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, 1, &descriptor_set, 0, nullptr); command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); diff --git a/src/video_core/renderer_vulkan/vk_layout_tracker.h b/src/video_core/renderer_vulkan/vk_layout_tracker.h new file mode 100644 index 000000000..ad42fb996 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_layout_tracker.h @@ -0,0 +1,80 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class LayoutTracker { + static constexpr u32 LAYOUT_BITS = 3; + static constexpr u32 MAX_LAYOUTS = (1 << LAYOUT_BITS); + static constexpr u32 LAYOUT_MASK = MAX_LAYOUTS - 1; + + // Build layout pattern masks at compile time for fast range equality checks + static constexpr auto LAYOUT_PATTERNS = []() { + std::array patterns{}; + for (u32 layout = 0; layout < MAX_LAYOUTS; layout++) { + for (u32 i = 0; i < 16; i++) { + patterns[layout] <<= LAYOUT_BITS; + patterns[layout] |= layout; + } + } + + return patterns; + }(); + +public: + LayoutTracker() = default; + + /// Returns the image layout of the provided level + [[nodiscard]] constexpr vk::ImageLayout GetLayout(u32 level) const { + const u32 shift = level * LAYOUT_BITS; + return static_cast((layouts >> shift) & LAYOUT_MASK); + } + + /// Returns true if the level and layer range provided has the same layout + [[nodiscard]] constexpr bool IsRangeEqual(vk::ImageLayout layout, u32 level, + u32 level_count) const { + const u32 shift = level * LAYOUT_BITS; + const u64 range_mask = (1ull << level_count * LAYOUT_BITS) - 1; + const u64 pattern = LAYOUT_PATTERNS[static_cast(layout)]; + return ((layouts >> shift) & range_mask) == (pattern & range_mask); + } + + /// Sets the image layout of the provided level + constexpr void SetLayout(vk::ImageLayout layout, u32 level, u32 level_count = 1) { + const u32 shift = level * LAYOUT_BITS; + const u64 range_mask = (1ull << level_count * LAYOUT_BITS) - 1; + const u64 pattern = LAYOUT_PATTERNS[static_cast(layout)]; + layouts &= ~(range_mask << shift); + layouts |= (pattern & range_mask) << shift; + } + + /// Calls func for each continuous layout range + template + void ForEachLayoutRange(u32 level, u32 level_count, vk::ImageLayout new_layout, T&& func) { + u32 start_level = level; + u32 end_level = level + level_count; + auto current_layout = GetLayout(level); + + while (level < end_level) { + level++; + const auto layout = GetLayout(level); + if (layout != current_layout || level == end_level) { + if (current_layout != new_layout) { + func(start_level, level - start_level, current_layout); + } + current_layout = layout; + start_level = level; + } + } + } + +public: + u64 layouts{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b5425c118..bbc7b7511 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -105,12 +105,26 @@ constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {vk::Format::eR32G32Sfloat}; constexpr std::array TEXTURE_BUFFER_FORMATS = {vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32A32Sfloat}; +constexpr VideoCore::SurfaceParams NULL_PARAMS = {.width = 1, + .height = 1, + .stride = 1, + .texture_type = VideoCore::TextureType::Texture2D, + .pixel_format = VideoCore::PixelFormat::RGBA8, + .type = VideoCore::SurfaceType::Color}; + +constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst; +constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage; + RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime, RenderpassCache& renderpass_cache) : instance{instance}, scheduler{scheduler}, runtime{runtime}, renderpass_cache{renderpass_cache}, res_cache{*this, runtime}, pipeline_cache{instance, scheduler, renderpass_cache}, + null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime}, + null_storage_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Uint, NULL_STORAGE_USAGE, runtime}, vertex_buffer{ instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, uniform_buffer{ @@ -122,8 +136,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { - // Create a 1x1 clear texture to use in the NULL case, - CreateDefaultTextures(); + null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1); uniform_block_data.lighting_lut_dirty.fill(true); @@ -156,12 +170,12 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1)); for (u32 i = 0; i < 4; i++) { - pipeline_cache.BindTexture(i, default_texture.image_view); + pipeline_cache.BindTexture(i, null_surface.GetImageView()); pipeline_cache.BindSampler(i, default_sampler); } for (u32 i = 0; i < 7; i++) { - pipeline_cache.BindStorageImage(i, default_storage_texture.image_view); + pipeline_cache.BindStorageImage(i, null_storage_surface.GetImageView()); } // Explicitly call the derived version to avoid warnings about calling virtual @@ -173,7 +187,6 @@ RasterizerVulkan::~RasterizerVulkan() { renderpass_cache.ExitRenderpass(); scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown); - VmaAllocator allocator = instance.GetAllocator(); vk::Device device = instance.GetDevice(); for (auto& [key, sampler] : samplers) { @@ -184,10 +197,6 @@ RasterizerVulkan::~RasterizerVulkan() { device.destroyFramebuffer(framebuffer); } - vmaDestroyImage(allocator, default_texture.image, default_texture.allocation); - vmaDestroyImage(allocator, default_storage_texture.image, default_storage_texture.allocation); - device.destroyImageView(default_texture.image_view); - device.destroyImageView(default_storage_texture.image_view); device.destroySampler(default_sampler); } @@ -672,9 +681,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { const u32 binding = static_cast(face); if (surface != nullptr) { - pipeline_cache.BindStorageImage(binding, surface->alloc.image_view); + pipeline_cache.BindStorageImage(binding, surface->GetImageView()); } else { - pipeline_cache.BindStorageImage(binding, default_storage_texture.image_view); + pipeline_cache.BindStorageImage(binding, null_storage_surface.GetImageView()); } }; @@ -718,7 +727,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { if (surface != nullptr) { pipeline_cache.BindStorageImage(0, surface->GetImageView()); } else { - pipeline_cache.BindStorageImage(0, default_storage_texture.image_view); + pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView()); } continue; } @@ -748,12 +757,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { auto surface = res_cache.GetTextureCube(config); if (surface != nullptr) { - runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc, - vk::ImageLayout::eShaderReadOnlyOptimal, 0, - surface->alloc.levels, 0, 6); - pipeline_cache.BindTexture(3, surface->alloc.image_view); + surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, + surface->alloc.levels); + pipeline_cache.BindTexture(3, surface->GetImageView()); } else { - pipeline_cache.BindTexture(3, default_texture.image_view); + pipeline_cache.BindTexture(3, null_surface.GetImageView()); } BindSampler(3, texture_cube_sampler, texture.config); @@ -769,9 +777,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { auto surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc, - vk::ImageLayout::eShaderReadOnlyOptimal, 0, - surface->alloc.levels); + surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, + surface->alloc.levels); CheckBarrier(surface->alloc.image_view, texture_index); } else { // Can occur when texture addr is null or its memory is unmapped/invalid @@ -781,10 +788,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // the geometry in question. // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn // on the male character's face, which in the OpenGL default appear black. - pipeline_cache.BindTexture(texture_index, default_texture.image_view); + pipeline_cache.BindTexture(texture_index, null_surface.GetImageView()); } } else { - pipeline_cache.BindTexture(texture_index, default_texture.image_view); + pipeline_cache.BindTexture(texture_index, null_surface.GetImageView()); pipeline_cache.BindSampler(texture_index, default_sampler); } } @@ -830,17 +837,12 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { it->second = CreateFramebuffer(framebuffer_info); } - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); if (color_surface) { - runtime.Transition(command_buffer, color_surface->alloc, - vk::ImageLayout::eColorAttachmentOptimal, 0, - color_surface->alloc.levels); + color_surface->Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1); } if (depth_surface) { - runtime.Transition(command_buffer, depth_surface->alloc, - vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, - depth_surface->alloc.levels); + depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1); } const vk::RenderPassBeginInfo renderpass_begin = { @@ -866,6 +868,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.BindPipeline(pipeline_info); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); const u32 batch_size = static_cast(vertex_batch.size()); for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { @@ -877,12 +881,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); vertex_buffer.Commit(vertex_size); - // Bind the vertex buffer at the current mapped offset. This effectively means - // that when base_vertex is zero the GPU will start drawing from the current mapped - // offset not the start of the buffer. - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset); - command_buffer.draw(vertices, 1, base_vertex, 0); } } @@ -1633,17 +1632,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info) return device.createFramebuffer(framebuffer_info); } -void RasterizerVulkan::CreateDefaultTextures() { - const vk::ImageUsageFlags usage = GetImageUsage(vk::ImageAspectFlagBits::eColor); - default_texture = runtime.Allocate(1, 1, 1, 1, vk::Format::eR8G8B8A8Unorm, usage, {}); - default_storage_texture = runtime.Allocate(1, 1, 1, 1, vk::Format::eR8G8B8A8Uint, usage, {}); - - const vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - runtime.Transition(command_buffer, default_texture, vk::ImageLayout::eShaderReadOnlyOptimal, 0, - 1); - runtime.Transition(command_buffer, default_storage_texture, vk::ImageLayout::eGeneral, 0, 1); -} - void RasterizerVulkan::FlushBuffers() { vertex_buffer.Flush(); uniform_buffer.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index e71254774..3703cfc93 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -249,8 +249,6 @@ private: /// Creates a new Vulkan framebuffer object vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info); - void CreateDefaultTextures(); - private: const Instance& instance; TaskScheduler& scheduler; @@ -280,8 +278,8 @@ private: std::vector vertex_batch; std::array binding_offsets{}; vk::Sampler default_sampler; - ImageAlloc default_texture; - ImageAlloc default_storage_texture; + Surface null_surface; + Surface null_storage_surface; struct { Pica::Shader::UniformData data{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index ad9fb5010..4464f8ac3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -35,7 +35,9 @@ constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024; TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache) - : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} { + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, blit_helper{ + instance, + scheduler} { for (auto& buffer : staging_buffers) { buffer = std::make_unique(instance, STAGING_BUFFER_SIZE, @@ -77,7 +79,10 @@ TextureRuntime::~TextureRuntime() { StagingData TextureRuntime::FindStaging(u32 size, bool upload) { const u32 current_slot = scheduler.GetCurrentSlotIndex(); - const u32 offset = staging_offsets[current_slot]; + u32& offset = staging_offsets[current_slot]; + // Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone + offset = Common::AlignUp(offset, 4); + if (offset + size > STAGING_BUFFER_SIZE) { LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!"); UNREACHABLE(); @@ -100,19 +105,6 @@ void TextureRuntime::OnSlotSwitch(u32 new_slot) { ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type) { - const u32 levels = std::bit_width(std::max(width, height)); - const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; - - const VideoCore::HostTextureTag key = { - .format = format, .width = width, .height = height, .layers = layers}; - - // Attempt to recycle an unused allocation - if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { - ImageAlloc alloc = std::move(it->second); - texture_recycler.erase(it); - return alloc; - } - const FormatTraits traits = instance.GetTraits(format); const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format)); @@ -121,23 +113,36 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma const vk::Format vk_format = is_suitable ? traits.native : traits.fallback; const vk::ImageUsageFlags vk_usage = is_suitable ? traits.usage : GetImageUsage(aspect); + return Allocate(width, height, type, vk_format, vk_usage); +} + +ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::TextureType type, + vk::Format format, vk::ImageUsageFlags usage) { + ImageAlloc alloc{}; + alloc.format = format; + alloc.levels = std::bit_width(std::max(width, height)); + alloc.layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; + alloc.aspect = GetImageAspect(format); + + const HostTextureTag key = {.format = format, .type = type, .width = width, .height = height}; + + // Attempt to recycle an unused allocation + if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { + ImageAlloc alloc = std::move(it->second); + texture_recycler.erase(it); + return alloc; + } + const vk::ImageCreateFlags flags = type == VideoCore::TextureType::CubeMap ? vk::ImageCreateFlagBits::eCubeCompatible : vk::ImageCreateFlags{}; - return Allocate(width, height, layers, levels, vk_format, vk_usage, flags); -} - -ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 layers, u32 levels, - vk::Format format, vk::ImageUsageFlags usage, - vk::ImageCreateFlags flags) { - const vk::ImageAspectFlags aspect = GetImageAspect(format); const vk::ImageCreateInfo image_info = {.flags = flags, .imageType = vk::ImageType::e2D, .format = format, .extent = {width, height, 1}, - .mipLevels = levels, - .arrayLayers = layers, + .mipLevels = alloc.levels, + .arrayLayers = alloc.layers, .samples = vk::SampleCountFlagBits::e1, .usage = usage}; @@ -145,79 +150,66 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 layers, u32 level VkImage unsafe_image{}; VkImageCreateInfo unsafe_image_info = static_cast(image_info); - VmaAllocation allocation; VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, - &unsafe_image, &allocation, nullptr); + &unsafe_image, &alloc.allocation, nullptr); if (result != VK_SUCCESS) [[unlikely]] { LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); UNREACHABLE(); } - const vk::ImageViewType view_type = flags & vk::ImageCreateFlagBits::eCubeCompatible - ? vk::ImageViewType::eCube - : vk::ImageViewType::e2D; + const vk::ImageViewType view_type = + type == VideoCore::TextureType::CubeMap ? vk::ImageViewType::eCube : vk::ImageViewType::e2D; - vk::Image image = vk::Image{unsafe_image}; - const vk::ImageViewCreateInfo view_info = {.image = image, + alloc.image = vk::Image{unsafe_image}; + const vk::ImageViewCreateInfo view_info = {.image = alloc.image, .viewType = view_type, .format = format, - .subresourceRange = {.aspectMask = aspect, + .subresourceRange = {.aspectMask = alloc.aspect, .baseMipLevel = 0, - .levelCount = levels, + .levelCount = alloc.levels, .baseArrayLayer = 0, - .layerCount = layers}}; + .layerCount = alloc.layers}}; vk::Device device = instance.GetDevice(); - vk::ImageView image_view = device.createImageView(view_info); + alloc.image_view = device.createImageView(view_info); // Also create a base mip view in case this is used as an attachment - vk::ImageView base_view; - if (levels > 1) [[likely]] { - const vk::ImageViewCreateInfo base_view_info = {.image = image, - .viewType = view_type, - .format = format, - .subresourceRange = {.aspectMask = aspect, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = layers}}; + if (alloc.levels > 1) [[likely]] { + const vk::ImageViewCreateInfo base_view_info = { + .image = alloc.image, + .viewType = view_type, + .format = format, + .subresourceRange = {.aspectMask = alloc.aspect, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = alloc.layers}}; - base_view = device.createImageView(base_view_info); + alloc.base_view = device.createImageView(base_view_info); } // Create seperate depth/stencil views in case this gets reinterpreted with a compute shader - vk::ImageView depth_view; - vk::ImageView stencil_view; - if (aspect & vk::ImageAspectFlagBits::eStencil) { + if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { vk::ImageViewCreateInfo view_info = { - .image = image, + .image = alloc.image, .viewType = view_type, .format = format, .subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eDepth, .baseMipLevel = 0, - .levelCount = levels, + .levelCount = alloc.levels, .baseArrayLayer = 0, - .layerCount = layers}}; + .layerCount = alloc.layers}}; - depth_view = device.createImageView(view_info); + alloc.depth_view = device.createImageView(view_info); view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil; - stencil_view = device.createImageView(view_info); + alloc.stencil_view = device.createImageView(view_info); } - return ImageAlloc{.image = image, - .image_view = image_view, - .base_view = base_view, - .depth_view = depth_view, - .stencil_view = stencil_view, - .allocation = allocation, - .format = format, - .aspect = aspect, - .levels = levels, - .layers = layers}; + return alloc; } -void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc) { +void TextureRuntime::Recycle(const HostTextureTag tag, ImageAlloc&& alloc) { texture_recycler.emplace(tag, std::move(alloc)); } @@ -239,6 +231,8 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa } } else { switch (surface.pixel_format) { + case VideoCore::PixelFormat::RGBA8: + return Pica::Texture::ConvertABGRToRGBA(source, dest); case VideoCore::PixelFormat::RGBA4: return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest); case VideoCore::PixelFormat::RGB8: @@ -258,10 +252,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea const vk::ImageAspectFlags aspect = ToVkAspect(surface.type); renderpass_cache.ExitRenderpass(); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, 0, - surface.alloc.levels, 0, - surface.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + surface.Transition(vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1); vk::ClearValue clear_value{}; if (aspect & vk::ImageAspectFlagBits::eColor) { @@ -282,6 +273,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea .baseArrayLayer = 0, .layerCount = 1}; + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); if (aspect & vk::ImageAspectFlagBits::eColor) { command_buffer.clearColorImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, clear_value.color, @@ -295,24 +287,22 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea } else { // For partial clears we begin a clear renderpass with the appropriate render area vk::RenderPass clear_renderpass{}; - ImageAlloc& alloc = surface.alloc; if (aspect & vk::ImageAspectFlagBits::eColor) { clear_renderpass = renderpass_cache.GetRenderpass( surface.pixel_format, VideoCore::PixelFormat::Invalid, true); - Transition(command_buffer, alloc, vk::ImageLayout::eColorAttachmentOptimal, 0, - alloc.levels); + surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1); } else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) { clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, surface.pixel_format, true); - Transition(command_buffer, alloc, vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, - alloc.levels); + surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1); } + const vk::ImageView framebuffer_view = surface.GetFramebufferView(); + auto [it, new_framebuffer] = - clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{}); + clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{}); if (new_framebuffer) { - const vk::ImageView framebuffer_view = surface.GetFramebufferView(); const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass, .attachmentCount = 1, .pAttachments = &framebuffer_view, @@ -345,6 +335,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { renderpass_cache.ExitRenderpass(); + source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); + dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); + const vk::ImageCopy image_copy = { .srcSubresource = {.aspectMask = ToVkAspect(source.type), .mipLevel = copy.src_level, @@ -359,11 +352,6 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, .extent = {copy.extent.width, copy.extent.height, 1}}; vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0, - source.alloc.levels); - Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0, - dest.alloc.levels); - command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy); @@ -374,12 +362,8 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { renderpass_cache.ExitRenderpass(); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0, - source.alloc.levels, 0, - source.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); - Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0, - dest.alloc.levels, 0, dest.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); + dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); const std::array source_offsets = {vk::Offset3D{static_cast(blit.src_rect.left), static_cast(blit.src_rect.bottom), 0}, @@ -402,6 +386,7 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, .layerCount = 1}, .dstOffsets = dest_offsets}; + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eNearest); @@ -420,8 +405,8 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { vk::ImageAspectFlags aspect = ToVkAspect(surface.type); vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); for (u32 i = 1; i < levels; i++) { - Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); - Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1); + surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); + surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1); const std::array source_offsets = {vk::Offset3D{0, 0, 0}, vk::Offset3D{current_width, current_height, 1}}; @@ -461,9 +446,9 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const { } void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, - vk::ImageLayout new_layout, u32 level, u32 level_count, u32 layer, - u32 layer_count) { - if (new_layout == alloc.layout || !alloc.image) { + vk::ImageLayout new_layout, u32 level, u32 level_count) { + LayoutTracker& tracker = alloc.tracker; + if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) { return; } @@ -540,28 +525,33 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al return info; }; - LayoutInfo source = GetLayoutInfo(alloc.layout); LayoutInfo dest = GetLayoutInfo(new_layout); + tracker.ForEachLayoutRange( + level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) { + LayoutInfo source = GetLayoutInfo(old_layout); + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = source.access, + .dstAccessMask = dest.access, + .oldLayout = old_layout, + .newLayout = new_layout, + .image = alloc.image, + .subresourceRange = {.aspectMask = alloc.aspect, + .baseMipLevel = start, + .levelCount = count, + .baseArrayLayer = 0, + .layerCount = alloc.layers}}; - const vk::ImageMemoryBarrier barrier = { - .srcAccessMask = source.access, - .dstAccessMask = dest.access, - .oldLayout = alloc.layout, - .newLayout = new_layout, - .image = alloc.image, - .subresourceRange = {.aspectMask = alloc.aspect, - .baseMipLevel = /*level*/ 0, - .levelCount = /*level_count*/ alloc.levels, - .baseArrayLayer = layer, - .layerCount = layer_count}}; + command_buffer.pipelineBarrier(source.stage, dest.stage, + vk::DependencyFlagBits::eByRegion, {}, {}, barrier); + }); - command_buffer.pipelineBarrier(source.stage, dest.stage, vk::DependencyFlagBits::eByRegion, {}, - {}, barrier); - - alloc.layout = new_layout; + tracker.SetLayout(new_layout, level, level_count); + for (u32 i = 0; i < level_count; i++) { + ASSERT(alloc.tracker.GetLayout(level + i) == new_layout); + } } -Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) +Surface::Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime) : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} { @@ -571,36 +561,52 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) } } +Surface::Surface(const VideoCore::SurfaceParams& params, vk::Format format, + vk::ImageUsageFlags usage, TextureRuntime& runtime) + : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, + scheduler{runtime.GetScheduler()} { + if (format != vk::Format::eUndefined) { + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), texture_type, format, usage); + } +} + Surface::~Surface() { if (pixel_format != VideoCore::PixelFormat::Invalid) { - const VideoCore::HostTextureTag tag = { - .format = pixel_format, - .width = GetScaledWidth(), - .height = GetScaledHeight(), - .layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u}; + const HostTextureTag tag = {.format = alloc.format, + .type = texture_type, + .width = GetScaledWidth(), + .height = GetScaledHeight()}; runtime.Recycle(tag, std::move(alloc)); } } +void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + runtime.Transition(command_buffer, alloc, new_layout, level, level_count); +} + MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { MICROPROFILE_SCOPE(Vulkan_Upload); + if (type == VideoCore::SurfaceType::DepthStencil) { + LOG_ERROR(Render_Vulkan, "Depth upload unimplemented, ignoring"); + return; + } + runtime.renderpass_cache.ExitRenderpass(); const bool is_scaled = res_scale != 1; if (is_scaled) { - LOG_ERROR(Render_Vulkan, "Unimplemented scaled upload!"); - ScaledUpload(upload); + ScaledUpload(upload, staging); } else { u32 region_count = 0; std::array copy_regions; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); const VideoCore::Rect2D rect = upload.texture_rect; vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset, + .bufferOffset = staging.buffer_offset + upload.buffer_offset, .bufferRowLength = rect.GetWidth(), .bufferImageHeight = rect.GetHeight(), .imageSubresource = {.aspectMask = alloc.aspect, @@ -623,9 +629,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa } } - runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0, - alloc.levels, 0, - texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.copyBufferToImage(staging.buffer, alloc.image, vk::ImageLayout::eTransferDstOptimal, region_count, copy_regions.data()); @@ -644,17 +650,19 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi runtime.renderpass_cache.ExitRenderpass(); + // For depth stencil downloads always use the compute shader fallback + // to avoid having the interleave the data later. These should(?) be + // uncommon anyways and the perf hit is very small + if (type == VideoCore::SurfaceType::DepthStencil) { + return DepthStencilDownload(download, staging); + } + const bool is_scaled = res_scale != 1; if (is_scaled) { - LOG_ERROR(Render_Vulkan, "Unimplemented scaled download!"); - ScaledDownload(download); + ScaledDownload(download, staging); } else { - u32 region_count = 0; - std::array copy_regions; - - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); const VideoCore::Rect2D rect = download.texture_rect; - vk::BufferImageCopy copy_region = { + const vk::BufferImageCopy copy_region = { .bufferOffset = staging.buffer_offset + download.buffer_offset, .bufferRowLength = rect.GetWidth(), .bufferImageHeight = rect.GetHeight(), @@ -665,25 +673,12 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; - if (alloc.aspect & vk::ImageAspectFlagBits::eColor) { - copy_regions[region_count++] = copy_region; - } else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) { - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - copy_regions[region_count++] = copy_region; - - if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { - copy_region.bufferOffset += 4 * staging.size / 5; - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; - copy_regions[region_count++] = copy_region; - } - } - - runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, 0, - alloc.levels); + Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); // Copy pixel data to the staging buffer + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal, - staging.buffer, region_count, copy_regions.data()); + staging.buffer, copy_region); } // Lock this data until the next scheduler switch @@ -692,66 +687,127 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi } u32 Surface::GetInternalBytesPerPixel() const { + // Request 5 bytes for D24S8 as well because we can use the + // extra space when deinterleaving the data during upload + if (alloc.format == vk::Format::eD24UnormS8Uint) { + return 5; + } + return vk::blockSize(alloc.format); } -void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { - /*const u32 rect_width = download.texture_rect.GetWidth(); - const u32 rect_height = download.texture_rect.GetHeight(); - - // Allocate an unscaled texture that fits the download rectangle to use as a blit destination - const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, - VideoCore::TextureType::Texture2D); - runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); - runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, - texture); - - // Blit the scaled rectangle to the unscaled texture - const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; - glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top, - 0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR); - - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); - - const auto& tuple = runtime.GetFormatTuple(pixel_format); - if (driver.IsOpenGLES()) { - const auto& downloader_es = runtime.GetDownloaderES(); - downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - rect_height, rect_width, - reinterpret_cast(download.buffer_offset)); - } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - reinterpret_cast(download.buffer_offset)); - }*/ -} - -void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { - /*const u32 rect_width = upload.texture_rect.GetWidth(); +void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { + const u32 rect_width = upload.texture_rect.GetWidth(); const u32 rect_height = upload.texture_rect.GetHeight(); - - OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, - VideoCore::TextureType::Texture2D); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); - - glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, - tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); - const auto scaled_rect = upload.texture_rect * res_scale; const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; - const auto& filterer = runtime.GetFilterer(); - if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) { - runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); - runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, - texture); - // If filtering fails, resort to normal blitting - glBlitFramebuffer(0, 0, rect_width, rect_height, - upload.texture_rect.left, upload.texture_rect.bottom, - upload.texture_rect.right, upload.texture_rect.top, - MakeBufferMask(type), GL_LINEAR); - }*/ + SurfaceParams unscaled_params = *this; + unscaled_params.width = rect_width; + unscaled_params.stride = rect_width; + unscaled_params.height = rect_height; + unscaled_params.res_scale = 1; + Surface unscaled_surface{unscaled_params, runtime}; + + const VideoCore::BufferTextureCopy unscaled_upload = {.buffer_offset = upload.buffer_offset, + .buffer_size = upload.buffer_size, + .texture_rect = unscaled_rect}; + + unscaled_surface.Upload(unscaled_upload, staging); + + const VideoCore::TextureBlit blit = {.src_level = 0, + .dst_level = upload.texture_level, + .src_layer = 0, + .dst_layer = 0, + .src_rect = unscaled_rect, + .dst_rect = scaled_rect}; + + runtime.BlitTextures(unscaled_surface, *this, blit); +} + +void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, + const StagingData& staging) { + const u32 rect_width = download.texture_rect.GetWidth(); + const u32 rect_height = download.texture_rect.GetHeight(); + const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; + const VideoCore::Rect2D unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; + + // Allocate an unscaled texture that fits the download rectangle to use as a blit destination + SurfaceParams unscaled_params = *this; + unscaled_params.width = rect_width; + unscaled_params.stride = rect_width; + unscaled_params.height = rect_height; + unscaled_params.res_scale = 1; + Surface unscaled_surface{unscaled_params, runtime}; + + const VideoCore::TextureBlit blit = {.src_level = download.texture_level, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_rect = scaled_rect, + .dst_rect = unscaled_rect}; + + // Blit the scaled rectangle to the unscaled texture + runtime.BlitTextures(*this, unscaled_surface, blit); + + const VideoCore::BufferTextureCopy unscaled_download = {.buffer_offset = download.buffer_offset, + .buffer_size = download.buffer_size, + .texture_rect = unscaled_rect, + .texture_level = 0}; + + unscaled_surface.Download(unscaled_download, staging); +} + +void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download, + const StagingData& staging) { + const u32 rect_width = download.texture_rect.GetWidth(); + const u32 rect_height = download.texture_rect.GetHeight(); + const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; + const VideoCore::Rect2D unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; + const VideoCore::Rect2D r32_scaled_rect = + VideoCore::Rect2D{0, scaled_rect.GetHeight(), scaled_rect.GetWidth(), 0}; + + // For depth downloads create an R32UI surface and use a compute shader for convert. + // Then we blit and download that surface + SurfaceParams r32_params = *this; + r32_params.width = scaled_rect.GetWidth(); + r32_params.stride = scaled_rect.GetWidth(); + r32_params.height = scaled_rect.GetHeight(); + r32_params.type = VideoCore::SurfaceType::Color; + r32_params.res_scale = 1; + Surface r32_surface{r32_params, vk::Format::eR32Uint, + vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eStorage, + runtime}; + + const VideoCore::TextureBlit blit = {.src_level = download.texture_level, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_rect = scaled_rect, + .dst_rect = r32_scaled_rect}; + + runtime.blit_helper.BlitD24S8ToR32(*this, r32_surface, blit); + + // Blit the upper mip level to the lower one to scale without additional allocations + const bool is_scaled = res_scale != 1; + if (is_scaled) { + const VideoCore::TextureBlit r32_blit = {.src_level = 0, + .dst_level = 1, + .src_layer = 0, + .dst_layer = 0, + .src_rect = r32_scaled_rect, + .dst_rect = unscaled_rect}; + + runtime.BlitTextures(r32_surface, r32_surface, r32_blit); + } + + const VideoCore::BufferTextureCopy r32_download = {.buffer_offset = download.buffer_offset, + .buffer_size = download.buffer_size, + .texture_rect = unscaled_rect, + .texture_level = is_scaled ? 1u : 0u}; + + r32_surface.Download(r32_download, staging); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 2d8691e7d..4b1e9b577 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -9,8 +9,10 @@ #include #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/renderer_vulkan/vk_blit_helper.h" #include "video_core/renderer_vulkan/vk_format_reinterpreter.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_layout_tracker.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" @@ -24,6 +26,14 @@ struct StagingData { }; struct ImageAlloc { + ImageAlloc() = default; + + ImageAlloc(const ImageAlloc&) = delete; + ImageAlloc& operator=(const ImageAlloc&) = delete; + + ImageAlloc(ImageAlloc&&) = default; + ImageAlloc& operator=(ImageAlloc&&) = default; + vk::Image image; vk::ImageView image_view; vk::ImageView base_view; @@ -32,12 +42,38 @@ struct ImageAlloc { VmaAllocation allocation; vk::ImageUsageFlags usage; vk::Format format; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor; u32 levels = 1; u32 layers = 1; + LayoutTracker tracker; }; +struct HostTextureTag { + vk::Format format = vk::Format::eUndefined; + VideoCore::TextureType type = VideoCore::TextureType::Texture2D; + u32 width = 1; + u32 height = 1; + + auto operator<=>(const HostTextureTag&) const noexcept = default; + + const u64 Hash() const { + return Common::ComputeHash64(this, sizeof(HostTextureTag)); + } +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept { + return tag.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Instance; class RenderpassCache; class Surface; @@ -62,15 +98,14 @@ public: VideoCore::TextureType type); /// Allocates a vulkan image - [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, u32 layers, u32 levels, - vk::Format format, vk::ImageUsageFlags usage, - vk::ImageCreateFlags flags); + [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::TextureType type, + vk::Format format, vk::ImageUsageFlags usage); /// Causes a GPU command flush void Finish(); /// Takes back ownership of the allocation for recycling - void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc); + void Recycle(const HostTextureTag tag, ImageAlloc&& alloc); /// Performs required format convertions on the staging data void FormatConvert(const Surface& surface, bool upload, std::span source, @@ -78,7 +113,7 @@ public: /// Transitions the mip level range of the surface to new_layout void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout, - u32 level, u32 level_count, u32 layer = 0, u32 layer_count = 1); + u32 level, u32 level_count); /// Fills the rectangle of the texture with the clear value provided bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, @@ -118,10 +153,11 @@ private: const Instance& instance; TaskScheduler& scheduler; RenderpassCache& renderpass_cache; + BlitHelper blit_helper; std::array reinterpreters; std::array, SCHEDULER_COMMAND_COUNT> staging_buffers; std::array staging_offsets{}; - std::unordered_multimap texture_recycler; + std::unordered_multimap texture_recycler; std::unordered_map clear_framebuffers; }; @@ -130,9 +166,14 @@ class Surface : public VideoCore::SurfaceBase { friend class RasterizerVulkan; public: - Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); + Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime); + Surface(const VideoCore::SurfaceParams& params, vk::Format format, vk::ImageUsageFlags usage, + TextureRuntime& runtime); ~Surface() override; + /// Transitions the mip level range of the surface to new_layout + void Transition(vk::ImageLayout new_layout, u32 level, u32 level_count); + /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); @@ -168,14 +209,15 @@ public: } private: - /// Downloads scaled image by downscaling the requested rectangle - void ScaledDownload(const VideoCore::BufferTextureCopy& download); - /// Uploads pixel data to scaled texture - void ScaledUpload(const VideoCore::BufferTextureCopy& upload); + void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); - /// Overrides the image layout of the mip level range - void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); + /// Downloads scaled image by downscaling the requested rectangle + void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& stagings); + + /// Downloads scaled depth stencil data + void DepthStencilDownload(const VideoCore::BufferTextureCopy& download, + const StagingData& staging); private: TextureRuntime& runtime; @@ -183,7 +225,7 @@ private: TaskScheduler& scheduler; public: - ImageAlloc alloc{}; + ImageAlloc alloc; FormatTraits traits; };