renderer_vulkan: Implement scaled uploads and downloads

* This commit includes large changes to have textures are handling. Instead of using ImageAlloc, Surface is used instead which provides multiple benefits: automatic recycling on destruction and ability to use the TextureRuntime interface to simplify operations

* Layout tracking is also implemented which allows transitioning of individual mip levels without errors

* This fixes graphical errors in multiple games which relied on framebuffer uploads
This commit is contained in:
emufan4568
2022-10-16 08:17:05 +03:00
parent caf596e5eb
commit 69db7d9d0d
12 changed files with 648 additions and 287 deletions

View File

@@ -84,10 +84,13 @@ add_library(video_core STATIC
renderer_vulkan/pica_to_vk.h renderer_vulkan/pica_to_vk.h
renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_blit_helper.cpp
renderer_vulkan/vk_blit_helper.h
renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h renderer_vulkan/vk_common.h
renderer_vulkan/vk_format_reinterpreter.cpp renderer_vulkan/vk_format_reinterpreter.cpp
renderer_vulkan/vk_format_reinterpreter.h renderer_vulkan/vk_format_reinterpreter.h
renderer_vulkan/vk_layout_tracker.h
renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.cpp

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
#include <unordered_map> #include <unordered_map>

View File

@@ -48,7 +48,7 @@ class SurfaceBase : public SurfaceParams, public std::enable_shared_from_this<S>
using Watcher = SurfaceWatcher<S>; using Watcher = SurfaceWatcher<S>;
public: public:
SurfaceBase(SurfaceParams& params) : SurfaceParams{params} {} SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {}
virtual ~SurfaceBase() = default; virtual ~SurfaceBase() = default;
/// Returns true when this surface can be used to fill the fill_interval of dest_surface /// Returns true when this surface can be used to fill the fill_interval of dest_surface

View File

@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <chrono>
#define GLM_FORCE_DEPTH_ZERO_TO_ONE #define GLM_FORCE_DEPTH_ZERO_TO_ONE
#include <glm/gtc/matrix_transform.hpp> #include <glm/gtc/matrix_transform.hpp>
#include "common/assert.h" #include "common/assert.h"
@@ -190,11 +189,9 @@ RendererVulkan::~RendererVulkan() {
} }
for (auto& info : screen_infos) { for (auto& info : screen_infos) {
const VideoCore::HostTextureTag tag = { const HostTextureTag tag = {.format = info.texture.alloc.format,
.format = VideoCore::PixelFormatFromGPUPixelFormat(info.texture.format), .width = info.texture.width,
.width = info.texture.width, .height = info.texture.height};
.height = info.texture.height,
.layers = 1};
runtime.Recycle(tag, std::move(info.texture.alloc)); runtime.Recycle(tag, std::move(info.texture.alloc));
} }
@@ -548,25 +545,21 @@ void RendererVulkan::BuildPipelines() {
void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture,
const GPU::Regs::FramebufferConfig& framebuffer) { const GPU::Regs::FramebufferConfig& framebuffer) {
TextureInfo old_texture = texture; TextureInfo old_texture = std::move(texture);
texture = TextureInfo{ texture = TextureInfo{.alloc = runtime.Allocate(
.alloc = framebuffer.width, framebuffer.height,
runtime.Allocate(framebuffer.width, framebuffer.height, VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format),
VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format), VideoCore::TextureType::Texture2D),
VideoCore::TextureType::Texture2D), .width = framebuffer.width,
.width = framebuffer.width, .height = framebuffer.height,
.height = framebuffer.height, .format = framebuffer.color_format};
.format = framebuffer.color_format,
};
// Recyle the old texture after allocation to avoid having duplicates of the same allocation in // Recyle the old texture after allocation to avoid having duplicates of the same allocation in
// the recycler // the recycler
if (old_texture.width != 0 && old_texture.height != 0) { if (old_texture.width != 0 && old_texture.height != 0) {
const VideoCore::HostTextureTag tag = { const HostTextureTag tag = {.format = old_texture.alloc.format,
.format = VideoCore::PixelFormatFromGPUPixelFormat(old_texture.format), .width = old_texture.width,
.width = old_texture.width, .height = old_texture.height};
.height = old_texture.height,
.layers = 1};
runtime.Recycle(tag, std::move(old_texture.alloc)); runtime.Recycle(tag, std::move(old_texture.alloc));
} }

View File

@@ -0,0 +1,161 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/vector_math.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, TaskScheduler& scheduler)
: scheduler{scheduler}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform highp texture2D depth;
layout(set = 0, binding = 1) uniform lowp utexture2D stencil;
layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color;
layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset;
};
void main() {
ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy);
ivec2 tex_coord = src_offset + dst_coord;
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uint value = stencil_val | (depth_val << 8);
imageStore(color, dst_coord, uvec4(value));
}
)";
compute_shader =
Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High);
const std::array compute_layout_bindings = {
vk::DescriptorSetLayoutBinding{.binding = 0,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 2,
.descriptorType = vk::DescriptorType::eStorageImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}};
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
.bindingCount = static_cast<u32>(compute_layout_bindings.size()),
.pBindings = compute_layout_bindings.data()};
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
const std::array update_template_entries = {
vk::DescriptorUpdateTemplateEntry{.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = 0,
.stride = sizeof(vk::DescriptorImageInfo)},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = sizeof(vk::DescriptorImageInfo),
.stride = 0},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage,
.offset = 2 * sizeof(vk::DescriptorImageInfo),
.stride = 0}};
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
.pDescriptorUpdateEntries = update_template_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_layout};
update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(Common::Vec2i),
};
const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1,
.pSetLayouts = &descriptor_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range};
compute_pipeline_layout = device.createPipelineLayout(layout_info);
const vk::PipelineShaderStageCreateInfo compute_stage = {
.stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"};
const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage,
.layout = compute_pipeline_layout};
if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) {
compute_pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
UNREACHABLE();
}
}
BlitHelper::~BlitHelper() {
device.destroyPipeline(compute_pipeline);
device.destroyPipelineLayout(compute_pipeline_layout);
device.destroyDescriptorUpdateTemplate(update_template);
device.destroyDescriptorSetLayout(descriptor_layout);
device.destroyShaderModule(compute_shader);
}
void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels);
dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = {
vk::DescriptorImageInfo{.imageView = source.GetDepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = source.GetStencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral}};
const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool =
scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_layout};
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
command_buffer.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
}
} // namespace Vulkan

View File

@@ -0,0 +1,39 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
struct TextureBlit;
}
namespace Vulkan {
class Instance;
class TaskScheduler;
class Surface;
class BlitHelper {
public:
BlitHelper(const Instance& instance, TaskScheduler& scheduler);
~BlitHelper();
/// Blits D24S8 pixel data to the provided buffer
void BlitD24S8ToR32(Surface& depth_surface, Surface& r32_surface,
const VideoCore::TextureBlit& blit);
private:
TaskScheduler& scheduler;
vk::Device device;
vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout;
vk::DescriptorSetLayout descriptor_layout;
vk::DescriptorSet descriptor_set;
vk::DescriptorUpdateTemplate update_template;
vk::ShaderModule compute_shader;
};
} // namespace Vulkan

View File

@@ -124,10 +124,8 @@ D24S8toRGBA8::~D24S8toRGBA8() {
void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest, void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest,
VideoCore::Rect2D dst_rect) { VideoCore::Rect2D dst_rect) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels);
runtime.Transition(command_buffer, source.alloc, vk::ImageLayout::eDepthStencilReadOnlyOptimal, dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
0, source.alloc.levels);
runtime.Transition(command_buffer, dest.alloc, vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = { const std::array textures = {
vk::DescriptorImageInfo{.imageView = source.GetDepthView(), vk::DescriptorImageInfo{.imageView = source.GetDepthView(),
@@ -145,6 +143,8 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
descriptor_set = device.allocateDescriptorSets(alloc_info)[0]; descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]); device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
1, &descriptor_set, 0, nullptr); 1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);

View File

@@ -0,0 +1,80 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class LayoutTracker {
static constexpr u32 LAYOUT_BITS = 3;
static constexpr u32 MAX_LAYOUTS = (1 << LAYOUT_BITS);
static constexpr u32 LAYOUT_MASK = MAX_LAYOUTS - 1;
// Build layout pattern masks at compile time for fast range equality checks
static constexpr auto LAYOUT_PATTERNS = []() {
std::array<u64, MAX_LAYOUTS> patterns{};
for (u32 layout = 0; layout < MAX_LAYOUTS; layout++) {
for (u32 i = 0; i < 16; i++) {
patterns[layout] <<= LAYOUT_BITS;
patterns[layout] |= layout;
}
}
return patterns;
}();
public:
LayoutTracker() = default;
/// Returns the image layout of the provided level
[[nodiscard]] constexpr vk::ImageLayout GetLayout(u32 level) const {
const u32 shift = level * LAYOUT_BITS;
return static_cast<vk::ImageLayout>((layouts >> shift) & LAYOUT_MASK);
}
/// Returns true if the level and layer range provided has the same layout
[[nodiscard]] constexpr bool IsRangeEqual(vk::ImageLayout layout, u32 level,
u32 level_count) const {
const u32 shift = level * LAYOUT_BITS;
const u64 range_mask = (1ull << level_count * LAYOUT_BITS) - 1;
const u64 pattern = LAYOUT_PATTERNS[static_cast<u64>(layout)];
return ((layouts >> shift) & range_mask) == (pattern & range_mask);
}
/// Sets the image layout of the provided level
constexpr void SetLayout(vk::ImageLayout layout, u32 level, u32 level_count = 1) {
const u32 shift = level * LAYOUT_BITS;
const u64 range_mask = (1ull << level_count * LAYOUT_BITS) - 1;
const u64 pattern = LAYOUT_PATTERNS[static_cast<u64>(layout)];
layouts &= ~(range_mask << shift);
layouts |= (pattern & range_mask) << shift;
}
/// Calls func for each continuous layout range
template <typename T>
void ForEachLayoutRange(u32 level, u32 level_count, vk::ImageLayout new_layout, T&& func) {
u32 start_level = level;
u32 end_level = level + level_count;
auto current_layout = GetLayout(level);
while (level < end_level) {
level++;
const auto layout = GetLayout(level);
if (layout != current_layout || level == end_level) {
if (current_layout != new_layout) {
func(start_level, level - start_level, current_layout);
}
current_layout = layout;
start_level = level;
}
}
}
public:
u64 layouts{};
};
} // namespace Vulkan

View File

@@ -105,12 +105,26 @@ constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {vk::Format::eR32G32Sfloat};
constexpr std::array TEXTURE_BUFFER_FORMATS = {vk::Format::eR32G32Sfloat, constexpr std::array TEXTURE_BUFFER_FORMATS = {vk::Format::eR32G32Sfloat,
vk::Format::eR32G32B32A32Sfloat}; vk::Format::eR32G32B32A32Sfloat};
constexpr VideoCore::SurfaceParams NULL_PARAMS = {.width = 1,
.height = 1,
.stride = 1,
.texture_type = VideoCore::TextureType::Texture2D,
.pixel_format = VideoCore::PixelFormat::RGBA8,
.type = VideoCore::SurfaceType::Color};
constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst;
constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage;
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
TaskScheduler& scheduler, TextureRuntime& runtime, TaskScheduler& scheduler, TextureRuntime& runtime,
RenderpassCache& renderpass_cache) RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, runtime{runtime}, : instance{instance}, scheduler{scheduler}, runtime{runtime},
renderpass_cache{renderpass_cache}, res_cache{*this, runtime}, renderpass_cache{renderpass_cache}, res_cache{*this, runtime},
pipeline_cache{instance, scheduler, renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache},
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
null_storage_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Uint, NULL_STORAGE_USAGE, runtime},
vertex_buffer{ vertex_buffer{
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
uniform_buffer{ uniform_buffer{
@@ -122,8 +136,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
// Create a 1x1 clear texture to use in the NULL case, null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
CreateDefaultTextures(); null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1);
uniform_block_data.lighting_lut_dirty.fill(true); uniform_block_data.lighting_lut_dirty.fill(true);
@@ -156,12 +170,12 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1)); pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1));
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
pipeline_cache.BindTexture(i, default_texture.image_view); pipeline_cache.BindTexture(i, null_surface.GetImageView());
pipeline_cache.BindSampler(i, default_sampler); pipeline_cache.BindSampler(i, default_sampler);
} }
for (u32 i = 0; i < 7; i++) { for (u32 i = 0; i < 7; i++) {
pipeline_cache.BindStorageImage(i, default_storage_texture.image_view); pipeline_cache.BindStorageImage(i, null_storage_surface.GetImageView());
} }
// Explicitly call the derived version to avoid warnings about calling virtual // Explicitly call the derived version to avoid warnings about calling virtual
@@ -173,7 +187,6 @@ RasterizerVulkan::~RasterizerVulkan() {
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown); scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
VmaAllocator allocator = instance.GetAllocator();
vk::Device device = instance.GetDevice(); vk::Device device = instance.GetDevice();
for (auto& [key, sampler] : samplers) { for (auto& [key, sampler] : samplers) {
@@ -184,10 +197,6 @@ RasterizerVulkan::~RasterizerVulkan() {
device.destroyFramebuffer(framebuffer); device.destroyFramebuffer(framebuffer);
} }
vmaDestroyImage(allocator, default_texture.image, default_texture.allocation);
vmaDestroyImage(allocator, default_storage_texture.image, default_storage_texture.allocation);
device.destroyImageView(default_texture.image_view);
device.destroyImageView(default_storage_texture.image_view);
device.destroySampler(default_sampler); device.destroySampler(default_sampler);
} }
@@ -672,9 +681,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
const u32 binding = static_cast<u32>(face); const u32 binding = static_cast<u32>(face);
if (surface != nullptr) { if (surface != nullptr) {
pipeline_cache.BindStorageImage(binding, surface->alloc.image_view); pipeline_cache.BindStorageImage(binding, surface->GetImageView());
} else { } else {
pipeline_cache.BindStorageImage(binding, default_storage_texture.image_view); pipeline_cache.BindStorageImage(binding, null_storage_surface.GetImageView());
} }
}; };
@@ -718,7 +727,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
if (surface != nullptr) { if (surface != nullptr) {
pipeline_cache.BindStorageImage(0, surface->GetImageView()); pipeline_cache.BindStorageImage(0, surface->GetImageView());
} else { } else {
pipeline_cache.BindStorageImage(0, default_storage_texture.image_view); pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView());
} }
continue; continue;
} }
@@ -748,12 +757,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureCube(config); auto surface = res_cache.GetTextureCube(config);
if (surface != nullptr) { if (surface != nullptr) {
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc, surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0,
vk::ImageLayout::eShaderReadOnlyOptimal, 0, surface->alloc.levels);
surface->alloc.levels, 0, 6); pipeline_cache.BindTexture(3, surface->GetImageView());
pipeline_cache.BindTexture(3, surface->alloc.image_view);
} else { } else {
pipeline_cache.BindTexture(3, default_texture.image_view); pipeline_cache.BindTexture(3, null_surface.GetImageView());
} }
BindSampler(3, texture_cube_sampler, texture.config); BindSampler(3, texture_cube_sampler, texture.config);
@@ -769,9 +777,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureSurface(texture); auto surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) { if (surface != nullptr) {
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc, surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0,
vk::ImageLayout::eShaderReadOnlyOptimal, 0, surface->alloc.levels);
surface->alloc.levels);
CheckBarrier(surface->alloc.image_view, texture_index); CheckBarrier(surface->alloc.image_view, texture_index);
} else { } else {
// Can occur when texture addr is null or its memory is unmapped/invalid // Can occur when texture addr is null or its memory is unmapped/invalid
@@ -781,10 +788,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// the geometry in question. // the geometry in question.
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
// on the male character's face, which in the OpenGL default appear black. // on the male character's face, which in the OpenGL default appear black.
pipeline_cache.BindTexture(texture_index, default_texture.image_view); pipeline_cache.BindTexture(texture_index, null_surface.GetImageView());
} }
} else { } else {
pipeline_cache.BindTexture(texture_index, default_texture.image_view); pipeline_cache.BindTexture(texture_index, null_surface.GetImageView());
pipeline_cache.BindSampler(texture_index, default_sampler); pipeline_cache.BindSampler(texture_index, default_sampler);
} }
} }
@@ -830,17 +837,12 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
it->second = CreateFramebuffer(framebuffer_info); it->second = CreateFramebuffer(framebuffer_info);
} }
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (color_surface) { if (color_surface) {
runtime.Transition(command_buffer, color_surface->alloc, color_surface->Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
vk::ImageLayout::eColorAttachmentOptimal, 0,
color_surface->alloc.levels);
} }
if (depth_surface) { if (depth_surface) {
runtime.Transition(command_buffer, depth_surface->alloc, depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
vk::ImageLayout::eDepthStencilAttachmentOptimal, 0,
depth_surface->alloc.levels);
} }
const vk::RenderPassBeginInfo renderpass_begin = { const vk::RenderPassBeginInfo renderpass_begin = {
@@ -866,6 +868,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.UseTrivialGeometryShader();
pipeline_cache.BindPipeline(pipeline_info); pipeline_cache.BindPipeline(pipeline_info);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 batch_size = static_cast<u32>(vertex_batch.size()); const u32 batch_size = static_cast<u32>(vertex_batch.size());
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
@@ -877,12 +881,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
vertex_buffer.Commit(vertex_size); vertex_buffer.Commit(vertex_size);
// Bind the vertex buffer at the current mapped offset. This effectively means
// that when base_vertex is zero the GPU will start drawing from the current mapped
// offset not the start of the buffer.
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset); command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
command_buffer.draw(vertices, 1, base_vertex, 0); command_buffer.draw(vertices, 1, base_vertex, 0);
} }
} }
@@ -1633,17 +1632,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info)
return device.createFramebuffer(framebuffer_info); return device.createFramebuffer(framebuffer_info);
} }
void RasterizerVulkan::CreateDefaultTextures() {
const vk::ImageUsageFlags usage = GetImageUsage(vk::ImageAspectFlagBits::eColor);
default_texture = runtime.Allocate(1, 1, 1, 1, vk::Format::eR8G8B8A8Unorm, usage, {});
default_storage_texture = runtime.Allocate(1, 1, 1, 1, vk::Format::eR8G8B8A8Uint, usage, {});
const vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, default_texture, vk::ImageLayout::eShaderReadOnlyOptimal, 0,
1);
runtime.Transition(command_buffer, default_storage_texture, vk::ImageLayout::eGeneral, 0, 1);
}
void RasterizerVulkan::FlushBuffers() { void RasterizerVulkan::FlushBuffers() {
vertex_buffer.Flush(); vertex_buffer.Flush();
uniform_buffer.Flush(); uniform_buffer.Flush();

View File

@@ -249,8 +249,6 @@ private:
/// Creates a new Vulkan framebuffer object /// Creates a new Vulkan framebuffer object
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info); vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info);
void CreateDefaultTextures();
private: private:
const Instance& instance; const Instance& instance;
TaskScheduler& scheduler; TaskScheduler& scheduler;
@@ -280,8 +278,8 @@ private:
std::vector<HardwareVertex> vertex_batch; std::vector<HardwareVertex> vertex_batch;
std::array<u64, 16> binding_offsets{}; std::array<u64, 16> binding_offsets{};
vk::Sampler default_sampler; vk::Sampler default_sampler;
ImageAlloc default_texture; Surface null_surface;
ImageAlloc default_storage_texture; Surface null_storage_surface;
struct { struct {
Pica::Shader::UniformData data{}; Pica::Shader::UniformData data{};

View File

@@ -35,7 +35,9 @@ constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler, TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache) RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} { : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, blit_helper{
instance,
scheduler} {
for (auto& buffer : staging_buffers) { for (auto& buffer : staging_buffers) {
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE, buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
@@ -77,7 +79,10 @@ TextureRuntime::~TextureRuntime() {
StagingData TextureRuntime::FindStaging(u32 size, bool upload) { StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const u32 current_slot = scheduler.GetCurrentSlotIndex(); const u32 current_slot = scheduler.GetCurrentSlotIndex();
const u32 offset = staging_offsets[current_slot]; u32& offset = staging_offsets[current_slot];
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
offset = Common::AlignUp(offset, 4);
if (offset + size > STAGING_BUFFER_SIZE) { if (offset + size > STAGING_BUFFER_SIZE) {
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!"); LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
UNREACHABLE(); UNREACHABLE();
@@ -100,19 +105,6 @@ void TextureRuntime::OnSlotSwitch(u32 new_slot) {
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) { VideoCore::TextureType type) {
const u32 levels = std::bit_width(std::max(width, height));
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
const VideoCore::HostTextureTag key = {
.format = format, .width = width, .height = height, .layers = layers};
// Attempt to recycle an unused allocation
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
ImageAlloc alloc = std::move(it->second);
texture_recycler.erase(it);
return alloc;
}
const FormatTraits traits = instance.GetTraits(format); const FormatTraits traits = instance.GetTraits(format);
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format)); const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
@@ -121,23 +113,36 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback; const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
const vk::ImageUsageFlags vk_usage = is_suitable ? traits.usage : GetImageUsage(aspect); const vk::ImageUsageFlags vk_usage = is_suitable ? traits.usage : GetImageUsage(aspect);
return Allocate(width, height, type, vk_format, vk_usage);
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::TextureType type,
vk::Format format, vk::ImageUsageFlags usage) {
ImageAlloc alloc{};
alloc.format = format;
alloc.levels = std::bit_width(std::max(width, height));
alloc.layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
alloc.aspect = GetImageAspect(format);
const HostTextureTag key = {.format = format, .type = type, .width = width, .height = height};
// Attempt to recycle an unused allocation
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
ImageAlloc alloc = std::move(it->second);
texture_recycler.erase(it);
return alloc;
}
const vk::ImageCreateFlags flags = type == VideoCore::TextureType::CubeMap const vk::ImageCreateFlags flags = type == VideoCore::TextureType::CubeMap
? vk::ImageCreateFlagBits::eCubeCompatible ? vk::ImageCreateFlagBits::eCubeCompatible
: vk::ImageCreateFlags{}; : vk::ImageCreateFlags{};
return Allocate(width, height, layers, levels, vk_format, vk_usage, flags);
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 layers, u32 levels,
vk::Format format, vk::ImageUsageFlags usage,
vk::ImageCreateFlags flags) {
const vk::ImageAspectFlags aspect = GetImageAspect(format);
const vk::ImageCreateInfo image_info = {.flags = flags, const vk::ImageCreateInfo image_info = {.flags = flags,
.imageType = vk::ImageType::e2D, .imageType = vk::ImageType::e2D,
.format = format, .format = format,
.extent = {width, height, 1}, .extent = {width, height, 1},
.mipLevels = levels, .mipLevels = alloc.levels,
.arrayLayers = layers, .arrayLayers = alloc.layers,
.samples = vk::SampleCountFlagBits::e1, .samples = vk::SampleCountFlagBits::e1,
.usage = usage}; .usage = usage};
@@ -145,79 +150,66 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 layers, u32 level
VkImage unsafe_image{}; VkImage unsafe_image{};
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info); VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VmaAllocation allocation;
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
&unsafe_image, &allocation, nullptr); &unsafe_image, &alloc.allocation, nullptr);
if (result != VK_SUCCESS) [[unlikely]] { if (result != VK_SUCCESS) [[unlikely]] {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
UNREACHABLE(); UNREACHABLE();
} }
const vk::ImageViewType view_type = flags & vk::ImageCreateFlagBits::eCubeCompatible const vk::ImageViewType view_type =
? vk::ImageViewType::eCube type == VideoCore::TextureType::CubeMap ? vk::ImageViewType::eCube : vk::ImageViewType::e2D;
: vk::ImageViewType::e2D;
vk::Image image = vk::Image{unsafe_image}; alloc.image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {.image = image, const vk::ImageViewCreateInfo view_info = {.image = alloc.image,
.viewType = view_type, .viewType = view_type,
.format = format, .format = format,
.subresourceRange = {.aspectMask = aspect, .subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = 0, .baseMipLevel = 0,
.levelCount = levels, .levelCount = alloc.levels,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = layers}}; .layerCount = alloc.layers}};
vk::Device device = instance.GetDevice(); vk::Device device = instance.GetDevice();
vk::ImageView image_view = device.createImageView(view_info); alloc.image_view = device.createImageView(view_info);
// Also create a base mip view in case this is used as an attachment // Also create a base mip view in case this is used as an attachment
vk::ImageView base_view; if (alloc.levels > 1) [[likely]] {
if (levels > 1) [[likely]] { const vk::ImageViewCreateInfo base_view_info = {
const vk::ImageViewCreateInfo base_view_info = {.image = image, .image = alloc.image,
.viewType = view_type, .viewType = view_type,
.format = format, .format = format,
.subresourceRange = {.aspectMask = aspect, .subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = 0, .baseMipLevel = 0,
.levelCount = 1, .levelCount = 1,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = layers}}; .layerCount = alloc.layers}};
base_view = device.createImageView(base_view_info); alloc.base_view = device.createImageView(base_view_info);
} }
// Create seperate depth/stencil views in case this gets reinterpreted with a compute shader // Create seperate depth/stencil views in case this gets reinterpreted with a compute shader
vk::ImageView depth_view; if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
vk::ImageView stencil_view;
if (aspect & vk::ImageAspectFlagBits::eStencil) {
vk::ImageViewCreateInfo view_info = { vk::ImageViewCreateInfo view_info = {
.image = image, .image = alloc.image,
.viewType = view_type, .viewType = view_type,
.format = format, .format = format,
.subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eDepth, .subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eDepth,
.baseMipLevel = 0, .baseMipLevel = 0,
.levelCount = levels, .levelCount = alloc.levels,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = layers}}; .layerCount = alloc.layers}};
depth_view = device.createImageView(view_info); alloc.depth_view = device.createImageView(view_info);
view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil; view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil;
stencil_view = device.createImageView(view_info); alloc.stencil_view = device.createImageView(view_info);
} }
return ImageAlloc{.image = image, return alloc;
.image_view = image_view,
.base_view = base_view,
.depth_view = depth_view,
.stencil_view = stencil_view,
.allocation = allocation,
.format = format,
.aspect = aspect,
.levels = levels,
.layers = layers};
} }
void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc) { void TextureRuntime::Recycle(const HostTextureTag tag, ImageAlloc&& alloc) {
texture_recycler.emplace(tag, std::move(alloc)); texture_recycler.emplace(tag, std::move(alloc));
} }
@@ -239,6 +231,8 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa
} }
} else { } else {
switch (surface.pixel_format) { switch (surface.pixel_format) {
case VideoCore::PixelFormat::RGBA8:
return Pica::Texture::ConvertABGRToRGBA(source, dest);
case VideoCore::PixelFormat::RGBA4: case VideoCore::PixelFormat::RGBA4:
return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest); return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
case VideoCore::PixelFormat::RGB8: case VideoCore::PixelFormat::RGB8:
@@ -258,10 +252,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
const vk::ImageAspectFlags aspect = ToVkAspect(surface.type); const vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); surface.Transition(vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
surface.alloc.levels, 0,
surface.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
vk::ClearValue clear_value{}; vk::ClearValue clear_value{};
if (aspect & vk::ImageAspectFlagBits::eColor) { if (aspect & vk::ImageAspectFlagBits::eColor) {
@@ -282,6 +273,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = 1}; .layerCount = 1};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (aspect & vk::ImageAspectFlagBits::eColor) { if (aspect & vk::ImageAspectFlagBits::eColor) {
command_buffer.clearColorImage(surface.alloc.image, command_buffer.clearColorImage(surface.alloc.image,
vk::ImageLayout::eTransferDstOptimal, clear_value.color, vk::ImageLayout::eTransferDstOptimal, clear_value.color,
@@ -295,24 +287,22 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
} else { } else {
// For partial clears we begin a clear renderpass with the appropriate render area // For partial clears we begin a clear renderpass with the appropriate render area
vk::RenderPass clear_renderpass{}; vk::RenderPass clear_renderpass{};
ImageAlloc& alloc = surface.alloc;
if (aspect & vk::ImageAspectFlagBits::eColor) { if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_renderpass = renderpass_cache.GetRenderpass( clear_renderpass = renderpass_cache.GetRenderpass(
surface.pixel_format, VideoCore::PixelFormat::Invalid, true); surface.pixel_format, VideoCore::PixelFormat::Invalid, true);
Transition(command_buffer, alloc, vk::ImageLayout::eColorAttachmentOptimal, 0, surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
alloc.levels);
} else if (aspect & vk::ImageAspectFlagBits::eDepth || } else if (aspect & vk::ImageAspectFlagBits::eDepth ||
aspect & vk::ImageAspectFlagBits::eStencil) { aspect & vk::ImageAspectFlagBits::eStencil) {
clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
surface.pixel_format, true); surface.pixel_format, true);
Transition(command_buffer, alloc, vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
alloc.levels);
} }
const vk::ImageView framebuffer_view = surface.GetFramebufferView();
auto [it, new_framebuffer] = auto [it, new_framebuffer] =
clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{}); clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{});
if (new_framebuffer) { if (new_framebuffer) {
const vk::ImageView framebuffer_view = surface.GetFramebufferView();
const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass, const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass,
.attachmentCount = 1, .attachmentCount = 1,
.pAttachments = &framebuffer_view, .pAttachments = &framebuffer_view,
@@ -345,6 +335,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
const VideoCore::TextureCopy& copy) { const VideoCore::TextureCopy& copy) {
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
const vk::ImageCopy image_copy = { const vk::ImageCopy image_copy = {
.srcSubresource = {.aspectMask = ToVkAspect(source.type), .srcSubresource = {.aspectMask = ToVkAspect(source.type),
.mipLevel = copy.src_level, .mipLevel = copy.src_level,
@@ -359,11 +352,6 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.extent = {copy.extent.width, copy.extent.height, 1}}; .extent = {copy.extent.width, copy.extent.height, 1}};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0,
source.alloc.levels);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
dest.alloc.levels);
command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy); dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
@@ -374,12 +362,8 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) { const VideoCore::TextureBlit& blit) {
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0, dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
source.alloc.levels, 0,
source.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
dest.alloc.levels, 0, dest.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left), const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left),
static_cast<s32>(blit.src_rect.bottom), 0}, static_cast<s32>(blit.src_rect.bottom), 0},
@@ -402,6 +386,7 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
.layerCount = 1}, .layerCount = 1},
.dstOffsets = dest_offsets}; .dstOffsets = dest_offsets};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area, dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area,
vk::Filter::eNearest); vk::Filter::eNearest);
@@ -420,8 +405,8 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
vk::ImageAspectFlags aspect = ToVkAspect(surface.type); vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) { for (u32 i = 1; i < levels; i++) {
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1); surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {vk::Offset3D{0, 0, 0}, const std::array source_offsets = {vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width, current_height, 1}}; vk::Offset3D{current_width, current_height, 1}};
@@ -461,9 +446,9 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const {
} }
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count, u32 layer, vk::ImageLayout new_layout, u32 level, u32 level_count) {
u32 layer_count) { LayoutTracker& tracker = alloc.tracker;
if (new_layout == alloc.layout || !alloc.image) { if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) {
return; return;
} }
@@ -540,28 +525,33 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
return info; return info;
}; };
LayoutInfo source = GetLayoutInfo(alloc.layout);
LayoutInfo dest = GetLayoutInfo(new_layout); LayoutInfo dest = GetLayoutInfo(new_layout);
tracker.ForEachLayoutRange(
level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) {
LayoutInfo source = GetLayoutInfo(old_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = alloc.image,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = start,
.levelCount = count,
.baseArrayLayer = 0,
.layerCount = alloc.layers}};
const vk::ImageMemoryBarrier barrier = { command_buffer.pipelineBarrier(source.stage, dest.stage,
.srcAccessMask = source.access, vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
.dstAccessMask = dest.access, });
.oldLayout = alloc.layout,
.newLayout = new_layout,
.image = alloc.image,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = /*level*/ 0,
.levelCount = /*level_count*/ alloc.levels,
.baseArrayLayer = layer,
.layerCount = layer_count}};
command_buffer.pipelineBarrier(source.stage, dest.stage, vk::DependencyFlagBits::eByRegion, {}, tracker.SetLayout(new_layout, level, level_count);
{}, barrier); for (u32 i = 0; i < level_count; i++) {
ASSERT(alloc.tracker.GetLayout(level + i) == new_layout);
alloc.layout = new_layout; }
} }
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) Surface::Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()}, : VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} { scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} {
@@ -571,36 +561,52 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
} }
} }
Surface::Surface(const VideoCore::SurfaceParams& params, vk::Format format,
vk::ImageUsageFlags usage, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
if (format != vk::Format::eUndefined) {
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), texture_type, format, usage);
}
}
Surface::~Surface() { Surface::~Surface() {
if (pixel_format != VideoCore::PixelFormat::Invalid) { if (pixel_format != VideoCore::PixelFormat::Invalid) {
const VideoCore::HostTextureTag tag = { const HostTextureTag tag = {.format = alloc.format,
.format = pixel_format, .type = texture_type,
.width = GetScaledWidth(), .width = GetScaledWidth(),
.height = GetScaledHeight(), .height = GetScaledHeight()};
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u};
runtime.Recycle(tag, std::move(alloc)); runtime.Recycle(tag, std::move(alloc));
} }
} }
void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, alloc, new_layout, level, level_count);
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload); MICROPROFILE_SCOPE(Vulkan_Upload);
if (type == VideoCore::SurfaceType::DepthStencil) {
LOG_ERROR(Render_Vulkan, "Depth upload unimplemented, ignoring");
return;
}
runtime.renderpass_cache.ExitRenderpass(); runtime.renderpass_cache.ExitRenderpass();
const bool is_scaled = res_scale != 1; const bool is_scaled = res_scale != 1;
if (is_scaled) { if (is_scaled) {
LOG_ERROR(Render_Vulkan, "Unimplemented scaled upload!"); ScaledUpload(upload, staging);
ScaledUpload(upload);
} else { } else {
u32 region_count = 0; u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions; std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = upload.texture_rect; const VideoCore::Rect2D rect = upload.texture_rect;
vk::BufferImageCopy copy_region = { vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset, .bufferOffset = staging.buffer_offset + upload.buffer_offset,
.bufferRowLength = rect.GetWidth(), .bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(), .bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = alloc.aspect, .imageSubresource = {.aspectMask = alloc.aspect,
@@ -623,9 +629,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
} }
} }
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0, Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
alloc.levels, 0,
texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyBufferToImage(staging.buffer, alloc.image, command_buffer.copyBufferToImage(staging.buffer, alloc.image,
vk::ImageLayout::eTransferDstOptimal, region_count, vk::ImageLayout::eTransferDstOptimal, region_count,
copy_regions.data()); copy_regions.data());
@@ -644,17 +650,19 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
runtime.renderpass_cache.ExitRenderpass(); runtime.renderpass_cache.ExitRenderpass();
// For depth stencil downloads always use the compute shader fallback
// to avoid having the interleave the data later. These should(?) be
// uncommon anyways and the perf hit is very small
if (type == VideoCore::SurfaceType::DepthStencil) {
return DepthStencilDownload(download, staging);
}
const bool is_scaled = res_scale != 1; const bool is_scaled = res_scale != 1;
if (is_scaled) { if (is_scaled) {
LOG_ERROR(Render_Vulkan, "Unimplemented scaled download!"); ScaledDownload(download, staging);
ScaledDownload(download);
} else { } else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect; const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = { const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset + download.buffer_offset, .bufferOffset = staging.buffer_offset + download.buffer_offset,
.bufferRowLength = rect.GetWidth(), .bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(), .bufferImageHeight = rect.GetHeight(),
@@ -665,25 +673,12 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0}, .imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) { Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
copy_regions[region_count++] = copy_region;
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += 4 * staging.size / 5;
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, 0,
alloc.levels);
// Copy pixel data to the staging buffer // Copy pixel data to the staging buffer
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal, command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data()); staging.buffer, copy_region);
} }
// Lock this data until the next scheduler switch // Lock this data until the next scheduler switch
@@ -692,66 +687,127 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
} }
u32 Surface::GetInternalBytesPerPixel() const { u32 Surface::GetInternalBytesPerPixel() const {
// Request 5 bytes for D24S8 as well because we can use the
// extra space when deinterleaving the data during upload
if (alloc.format == vk::Format::eD24UnormS8Uint) {
return 5;
}
return vk::blockSize(alloc.format); return vk::blockSize(alloc.format);
} }
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
/*const u32 rect_width = download.texture_rect.GetWidth(); const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type,
texture);
// Blit the scaled rectangle to the unscaled texture
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
const auto& tuple = runtime.GetFormatTuple(pixel_format);
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
}*/
}
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
/*const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = upload.texture_rect.GetHeight(); const u32 rect_height = upload.texture_rect.GetHeight();
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
const auto scaled_rect = upload.texture_rect * res_scale; const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const auto& filterer = runtime.GetFilterer();
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type,
texture);
// If filtering fails, resort to normal blitting SurfaceParams unscaled_params = *this;
glBlitFramebuffer(0, 0, rect_width, rect_height, unscaled_params.width = rect_width;
upload.texture_rect.left, upload.texture_rect.bottom, unscaled_params.stride = rect_width;
upload.texture_rect.right, upload.texture_rect.top, unscaled_params.height = rect_height;
MakeBufferMask(type), GL_LINEAR); unscaled_params.res_scale = 1;
}*/ Surface unscaled_surface{unscaled_params, runtime};
const VideoCore::BufferTextureCopy unscaled_upload = {.buffer_offset = upload.buffer_offset,
.buffer_size = upload.buffer_size,
.texture_rect = unscaled_rect};
unscaled_surface.Upload(unscaled_upload, staging);
const VideoCore::TextureBlit blit = {.src_level = 0,
.dst_level = upload.texture_level,
.src_layer = 0,
.dst_layer = 0,
.src_rect = unscaled_rect,
.dst_rect = scaled_rect};
runtime.BlitTextures(unscaled_surface, *this, blit);
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging) {
const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
const VideoCore::Rect2D unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
SurfaceParams unscaled_params = *this;
unscaled_params.width = rect_width;
unscaled_params.stride = rect_width;
unscaled_params.height = rect_height;
unscaled_params.res_scale = 1;
Surface unscaled_surface{unscaled_params, runtime};
const VideoCore::TextureBlit blit = {.src_level = download.texture_level,
.dst_level = 0,
.src_layer = 0,
.dst_layer = 0,
.src_rect = scaled_rect,
.dst_rect = unscaled_rect};
// Blit the scaled rectangle to the unscaled texture
runtime.BlitTextures(*this, unscaled_surface, blit);
const VideoCore::BufferTextureCopy unscaled_download = {.buffer_offset = download.buffer_offset,
.buffer_size = download.buffer_size,
.texture_rect = unscaled_rect,
.texture_level = 0};
unscaled_surface.Download(unscaled_download, staging);
}
void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging) {
const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
const VideoCore::Rect2D unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const VideoCore::Rect2D r32_scaled_rect =
VideoCore::Rect2D{0, scaled_rect.GetHeight(), scaled_rect.GetWidth(), 0};
// For depth downloads create an R32UI surface and use a compute shader for convert.
// Then we blit and download that surface
SurfaceParams r32_params = *this;
r32_params.width = scaled_rect.GetWidth();
r32_params.stride = scaled_rect.GetWidth();
r32_params.height = scaled_rect.GetHeight();
r32_params.type = VideoCore::SurfaceType::Color;
r32_params.res_scale = 1;
Surface r32_surface{r32_params, vk::Format::eR32Uint,
vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eStorage,
runtime};
const VideoCore::TextureBlit blit = {.src_level = download.texture_level,
.dst_level = 0,
.src_layer = 0,
.dst_layer = 0,
.src_rect = scaled_rect,
.dst_rect = r32_scaled_rect};
runtime.blit_helper.BlitD24S8ToR32(*this, r32_surface, blit);
// Blit the upper mip level to the lower one to scale without additional allocations
const bool is_scaled = res_scale != 1;
if (is_scaled) {
const VideoCore::TextureBlit r32_blit = {.src_level = 0,
.dst_level = 1,
.src_layer = 0,
.dst_layer = 0,
.src_rect = r32_scaled_rect,
.dst_rect = unscaled_rect};
runtime.BlitTextures(r32_surface, r32_surface, r32_blit);
}
const VideoCore::BufferTextureCopy r32_download = {.buffer_offset = download.buffer_offset,
.buffer_size = download.buffer_size,
.texture_rect = unscaled_rect,
.texture_level = is_scaled ? 1u : 0u};
r32_surface.Download(r32_download, staging);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@@ -9,8 +9,10 @@
#include <vulkan/vulkan_hash.hpp> #include <vulkan/vulkan_hash.hpp>
#include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h" #include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h"
@@ -24,6 +26,14 @@ struct StagingData {
}; };
struct ImageAlloc { struct ImageAlloc {
ImageAlloc() = default;
ImageAlloc(const ImageAlloc&) = delete;
ImageAlloc& operator=(const ImageAlloc&) = delete;
ImageAlloc(ImageAlloc&&) = default;
ImageAlloc& operator=(ImageAlloc&&) = default;
vk::Image image; vk::Image image;
vk::ImageView image_view; vk::ImageView image_view;
vk::ImageView base_view; vk::ImageView base_view;
@@ -32,12 +42,38 @@ struct ImageAlloc {
VmaAllocation allocation; VmaAllocation allocation;
vk::ImageUsageFlags usage; vk::ImageUsageFlags usage;
vk::Format format; vk::Format format;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor; vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor;
u32 levels = 1; u32 levels = 1;
u32 layers = 1; u32 layers = 1;
LayoutTracker tracker;
}; };
struct HostTextureTag {
vk::Format format = vk::Format::eUndefined;
VideoCore::TextureType type = VideoCore::TextureType::Texture2D;
u32 width = 1;
u32 height = 1;
auto operator<=>(const HostTextureTag&) const noexcept = default;
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(HostTextureTag));
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::HostTextureTag> {
std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept {
return tag.Hash();
}
};
} // namespace std
namespace Vulkan {
class Instance; class Instance;
class RenderpassCache; class RenderpassCache;
class Surface; class Surface;
@@ -62,15 +98,14 @@ public:
VideoCore::TextureType type); VideoCore::TextureType type);
/// Allocates a vulkan image /// Allocates a vulkan image
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, u32 layers, u32 levels, [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::TextureType type,
vk::Format format, vk::ImageUsageFlags usage, vk::Format format, vk::ImageUsageFlags usage);
vk::ImageCreateFlags flags);
/// Causes a GPU command flush /// Causes a GPU command flush
void Finish(); void Finish();
/// Takes back ownership of the allocation for recycling /// Takes back ownership of the allocation for recycling
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc); void Recycle(const HostTextureTag tag, ImageAlloc&& alloc);
/// Performs required format convertions on the staging data /// Performs required format convertions on the staging data
void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source, void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
@@ -78,7 +113,7 @@ public:
/// Transitions the mip level range of the surface to new_layout /// Transitions the mip level range of the surface to new_layout
void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout, void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout,
u32 level, u32 level_count, u32 layer = 0, u32 layer_count = 1); u32 level, u32 level_count);
/// Fills the rectangle of the texture with the clear value provided /// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
@@ -118,10 +153,11 @@ private:
const Instance& instance; const Instance& instance;
TaskScheduler& scheduler; TaskScheduler& scheduler;
RenderpassCache& renderpass_cache; RenderpassCache& renderpass_cache;
BlitHelper blit_helper;
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters; std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers; std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{}; std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
std::unordered_multimap<VideoCore::HostTextureTag, ImageAlloc> texture_recycler; std::unordered_multimap<HostTextureTag, ImageAlloc> texture_recycler;
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers; std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
}; };
@@ -130,9 +166,14 @@ class Surface : public VideoCore::SurfaceBase<Surface> {
friend class RasterizerVulkan; friend class RasterizerVulkan;
public: public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime);
Surface(const VideoCore::SurfaceParams& params, vk::Format format, vk::ImageUsageFlags usage,
TextureRuntime& runtime);
~Surface() override; ~Surface() override;
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::ImageLayout new_layout, u32 level, u32 level_count);
/// Uploads pixel data in staging to a rectangle region of the surface texture /// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
@@ -168,14 +209,15 @@ public:
} }
private: private:
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
/// Uploads pixel data to scaled texture /// Uploads pixel data to scaled texture
void ScaledUpload(const VideoCore::BufferTextureCopy& upload); void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
/// Overrides the image layout of the mip level range /// Downloads scaled image by downscaling the requested rectangle
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& stagings);
/// Downloads scaled depth stencil data
void DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging);
private: private:
TextureRuntime& runtime; TextureRuntime& runtime;
@@ -183,7 +225,7 @@ private:
TaskScheduler& scheduler; TaskScheduler& scheduler;
public: public:
ImageAlloc alloc{}; ImageAlloc alloc;
FormatTraits traits; FormatTraits traits;
}; };