renderer_vulkan: Implement scaled uploads and downloads

* This commit includes large changes to have textures are handling. Instead of using ImageAlloc, Surface is used instead which provides multiple benefits: automatic recycling on destruction and ability to use the TextureRuntime interface to simplify operations

* Layout tracking is also implemented which allows transitioning of individual mip levels without errors

* This fixes graphical errors in multiple games which relied on framebuffer uploads
This commit is contained in:
emufan4568
2022-10-16 08:17:05 +03:00
parent caf596e5eb
commit 69db7d9d0d
12 changed files with 648 additions and 287 deletions

View File

@@ -84,10 +84,13 @@ add_library(video_core STATIC
renderer_vulkan/pica_to_vk.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_blit_helper.cpp
renderer_vulkan/vk_blit_helper.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_format_reinterpreter.cpp
renderer_vulkan/vk_format_reinterpreter.h
renderer_vulkan/vk_layout_tracker.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_instance.cpp

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <optional>
#include <unordered_map>

View File

@@ -48,7 +48,7 @@ class SurfaceBase : public SurfaceParams, public std::enable_shared_from_this<S>
using Watcher = SurfaceWatcher<S>;
public:
SurfaceBase(SurfaceParams& params) : SurfaceParams{params} {}
SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {}
virtual ~SurfaceBase() = default;
/// Returns true when this surface can be used to fill the fill_interval of dest_surface

View File

@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <chrono>
#define GLM_FORCE_DEPTH_ZERO_TO_ONE
#include <glm/gtc/matrix_transform.hpp>
#include "common/assert.h"
@@ -190,11 +189,9 @@ RendererVulkan::~RendererVulkan() {
}
for (auto& info : screen_infos) {
const VideoCore::HostTextureTag tag = {
.format = VideoCore::PixelFormatFromGPUPixelFormat(info.texture.format),
.width = info.texture.width,
.height = info.texture.height,
.layers = 1};
const HostTextureTag tag = {.format = info.texture.alloc.format,
.width = info.texture.width,
.height = info.texture.height};
runtime.Recycle(tag, std::move(info.texture.alloc));
}
@@ -548,25 +545,21 @@ void RendererVulkan::BuildPipelines() {
void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture,
const GPU::Regs::FramebufferConfig& framebuffer) {
TextureInfo old_texture = texture;
texture = TextureInfo{
.alloc =
runtime.Allocate(framebuffer.width, framebuffer.height,
VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format),
VideoCore::TextureType::Texture2D),
.width = framebuffer.width,
.height = framebuffer.height,
.format = framebuffer.color_format,
};
TextureInfo old_texture = std::move(texture);
texture = TextureInfo{.alloc = runtime.Allocate(
framebuffer.width, framebuffer.height,
VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format),
VideoCore::TextureType::Texture2D),
.width = framebuffer.width,
.height = framebuffer.height,
.format = framebuffer.color_format};
// Recyle the old texture after allocation to avoid having duplicates of the same allocation in
// the recycler
if (old_texture.width != 0 && old_texture.height != 0) {
const VideoCore::HostTextureTag tag = {
.format = VideoCore::PixelFormatFromGPUPixelFormat(old_texture.format),
.width = old_texture.width,
.height = old_texture.height,
.layers = 1};
const HostTextureTag tag = {.format = old_texture.alloc.format,
.width = old_texture.width,
.height = old_texture.height};
runtime.Recycle(tag, std::move(old_texture.alloc));
}

View File

@@ -0,0 +1,161 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/vector_math.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, TaskScheduler& scheduler)
: scheduler{scheduler}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform highp texture2D depth;
layout(set = 0, binding = 1) uniform lowp utexture2D stencil;
layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color;
layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset;
};
void main() {
ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy);
ivec2 tex_coord = src_offset + dst_coord;
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uint value = stencil_val | (depth_val << 8);
imageStore(color, dst_coord, uvec4(value));
}
)";
compute_shader =
Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High);
const std::array compute_layout_bindings = {
vk::DescriptorSetLayoutBinding{.binding = 0,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute},
vk::DescriptorSetLayoutBinding{.binding = 2,
.descriptorType = vk::DescriptorType::eStorageImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute}};
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
.bindingCount = static_cast<u32>(compute_layout_bindings.size()),
.pBindings = compute_layout_bindings.data()};
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
const std::array update_template_entries = {
vk::DescriptorUpdateTemplateEntry{.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = 0,
.stride = sizeof(vk::DescriptorImageInfo)},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = sizeof(vk::DescriptorImageInfo),
.stride = 0},
vk::DescriptorUpdateTemplateEntry{.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage,
.offset = 2 * sizeof(vk::DescriptorImageInfo),
.stride = 0}};
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
.pDescriptorUpdateEntries = update_template_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_layout};
update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(Common::Vec2i),
};
const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = 1,
.pSetLayouts = &descriptor_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range};
compute_pipeline_layout = device.createPipelineLayout(layout_info);
const vk::PipelineShaderStageCreateInfo compute_stage = {
.stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main"};
const vk::ComputePipelineCreateInfo compute_info = {.stage = compute_stage,
.layout = compute_pipeline_layout};
if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) {
compute_pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
UNREACHABLE();
}
}
BlitHelper::~BlitHelper() {
device.destroyPipeline(compute_pipeline);
device.destroyPipelineLayout(compute_pipeline_layout);
device.destroyDescriptorUpdateTemplate(update_template);
device.destroyDescriptorSetLayout(descriptor_layout);
device.destroyShaderModule(compute_shader);
}
void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels);
dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = {
vk::DescriptorImageInfo{.imageView = source.GetDepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = source.GetStencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal},
vk::DescriptorImageInfo{.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral}};
const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool =
scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_layout};
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
command_buffer.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
}
} // namespace Vulkan

View File

@@ -0,0 +1,39 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
struct TextureBlit;
}
namespace Vulkan {
class Instance;
class TaskScheduler;
class Surface;
class BlitHelper {
public:
BlitHelper(const Instance& instance, TaskScheduler& scheduler);
~BlitHelper();
/// Blits D24S8 pixel data to the provided buffer
void BlitD24S8ToR32(Surface& depth_surface, Surface& r32_surface,
const VideoCore::TextureBlit& blit);
private:
TaskScheduler& scheduler;
vk::Device device;
vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout;
vk::DescriptorSetLayout descriptor_layout;
vk::DescriptorSet descriptor_set;
vk::DescriptorUpdateTemplate update_template;
vk::ShaderModule compute_shader;
};
} // namespace Vulkan

View File

@@ -124,10 +124,8 @@ D24S8toRGBA8::~D24S8toRGBA8() {
void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surface& dest,
VideoCore::Rect2D dst_rect) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, source.alloc, vk::ImageLayout::eDepthStencilReadOnlyOptimal,
0, source.alloc.levels);
runtime.Transition(command_buffer, dest.alloc, vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
source.Transition(vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, source.alloc.levels);
dest.Transition(vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = {
vk::DescriptorImageInfo{.imageView = source.GetDepthView(),
@@ -145,6 +143,8 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);

View File

@@ -0,0 +1,80 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class LayoutTracker {
static constexpr u32 LAYOUT_BITS = 3;
static constexpr u32 MAX_LAYOUTS = (1 << LAYOUT_BITS);
static constexpr u32 LAYOUT_MASK = MAX_LAYOUTS - 1;
// Build layout pattern masks at compile time for fast range equality checks
static constexpr auto LAYOUT_PATTERNS = []() {
std::array<u64, MAX_LAYOUTS> patterns{};
for (u32 layout = 0; layout < MAX_LAYOUTS; layout++) {
for (u32 i = 0; i < 16; i++) {
patterns[layout] <<= LAYOUT_BITS;
patterns[layout] |= layout;
}
}
return patterns;
}();
public:
LayoutTracker() = default;
/// Returns the image layout of the provided level
[[nodiscard]] constexpr vk::ImageLayout GetLayout(u32 level) const {
const u32 shift = level * LAYOUT_BITS;
return static_cast<vk::ImageLayout>((layouts >> shift) & LAYOUT_MASK);
}
/// Returns true if the level and layer range provided has the same layout
[[nodiscard]] constexpr bool IsRangeEqual(vk::ImageLayout layout, u32 level,
u32 level_count) const {
const u32 shift = level * LAYOUT_BITS;
const u64 range_mask = (1ull << level_count * LAYOUT_BITS) - 1;
const u64 pattern = LAYOUT_PATTERNS[static_cast<u64>(layout)];
return ((layouts >> shift) & range_mask) == (pattern & range_mask);
}
/// Sets the image layout of the provided level
constexpr void SetLayout(vk::ImageLayout layout, u32 level, u32 level_count = 1) {
const u32 shift = level * LAYOUT_BITS;
const u64 range_mask = (1ull << level_count * LAYOUT_BITS) - 1;
const u64 pattern = LAYOUT_PATTERNS[static_cast<u64>(layout)];
layouts &= ~(range_mask << shift);
layouts |= (pattern & range_mask) << shift;
}
/// Calls func for each continuous layout range
template <typename T>
void ForEachLayoutRange(u32 level, u32 level_count, vk::ImageLayout new_layout, T&& func) {
u32 start_level = level;
u32 end_level = level + level_count;
auto current_layout = GetLayout(level);
while (level < end_level) {
level++;
const auto layout = GetLayout(level);
if (layout != current_layout || level == end_level) {
if (current_layout != new_layout) {
func(start_level, level - start_level, current_layout);
}
current_layout = layout;
start_level = level;
}
}
}
public:
u64 layouts{};
};
} // namespace Vulkan

View File

@@ -105,12 +105,26 @@ constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {vk::Format::eR32G32Sfloat};
constexpr std::array TEXTURE_BUFFER_FORMATS = {vk::Format::eR32G32Sfloat,
vk::Format::eR32G32B32A32Sfloat};
constexpr VideoCore::SurfaceParams NULL_PARAMS = {.width = 1,
.height = 1,
.stride = 1,
.texture_type = VideoCore::TextureType::Texture2D,
.pixel_format = VideoCore::PixelFormat::RGBA8,
.type = VideoCore::SurfaceType::Color};
constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst;
constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage;
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
TaskScheduler& scheduler, TextureRuntime& runtime,
RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, runtime{runtime},
renderpass_cache{renderpass_cache}, res_cache{*this, runtime},
pipeline_cache{instance, scheduler, renderpass_cache},
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
null_storage_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Uint, NULL_STORAGE_USAGE, runtime},
vertex_buffer{
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
uniform_buffer{
@@ -122,8 +136,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
// Create a 1x1 clear texture to use in the NULL case,
CreateDefaultTextures();
null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1);
uniform_block_data.lighting_lut_dirty.fill(true);
@@ -156,12 +170,12 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1));
for (u32 i = 0; i < 4; i++) {
pipeline_cache.BindTexture(i, default_texture.image_view);
pipeline_cache.BindTexture(i, null_surface.GetImageView());
pipeline_cache.BindSampler(i, default_sampler);
}
for (u32 i = 0; i < 7; i++) {
pipeline_cache.BindStorageImage(i, default_storage_texture.image_view);
pipeline_cache.BindStorageImage(i, null_storage_surface.GetImageView());
}
// Explicitly call the derived version to avoid warnings about calling virtual
@@ -173,7 +187,6 @@ RasterizerVulkan::~RasterizerVulkan() {
renderpass_cache.ExitRenderpass();
scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
VmaAllocator allocator = instance.GetAllocator();
vk::Device device = instance.GetDevice();
for (auto& [key, sampler] : samplers) {
@@ -184,10 +197,6 @@ RasterizerVulkan::~RasterizerVulkan() {
device.destroyFramebuffer(framebuffer);
}
vmaDestroyImage(allocator, default_texture.image, default_texture.allocation);
vmaDestroyImage(allocator, default_storage_texture.image, default_storage_texture.allocation);
device.destroyImageView(default_texture.image_view);
device.destroyImageView(default_storage_texture.image_view);
device.destroySampler(default_sampler);
}
@@ -672,9 +681,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
const u32 binding = static_cast<u32>(face);
if (surface != nullptr) {
pipeline_cache.BindStorageImage(binding, surface->alloc.image_view);
pipeline_cache.BindStorageImage(binding, surface->GetImageView());
} else {
pipeline_cache.BindStorageImage(binding, default_storage_texture.image_view);
pipeline_cache.BindStorageImage(binding, null_storage_surface.GetImageView());
}
};
@@ -718,7 +727,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
if (surface != nullptr) {
pipeline_cache.BindStorageImage(0, surface->GetImageView());
} else {
pipeline_cache.BindStorageImage(0, default_storage_texture.image_view);
pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView());
}
continue;
}
@@ -748,12 +757,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureCube(config);
if (surface != nullptr) {
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc,
vk::ImageLayout::eShaderReadOnlyOptimal, 0,
surface->alloc.levels, 0, 6);
pipeline_cache.BindTexture(3, surface->alloc.image_view);
surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0,
surface->alloc.levels);
pipeline_cache.BindTexture(3, surface->GetImageView());
} else {
pipeline_cache.BindTexture(3, default_texture.image_view);
pipeline_cache.BindTexture(3, null_surface.GetImageView());
}
BindSampler(3, texture_cube_sampler, texture.config);
@@ -769,9 +777,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
runtime.Transition(scheduler.GetRenderCommandBuffer(), surface->alloc,
vk::ImageLayout::eShaderReadOnlyOptimal, 0,
surface->alloc.levels);
surface->Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0,
surface->alloc.levels);
CheckBarrier(surface->alloc.image_view, texture_index);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
@@ -781,10 +788,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// the geometry in question.
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
// on the male character's face, which in the OpenGL default appear black.
pipeline_cache.BindTexture(texture_index, default_texture.image_view);
pipeline_cache.BindTexture(texture_index, null_surface.GetImageView());
}
} else {
pipeline_cache.BindTexture(texture_index, default_texture.image_view);
pipeline_cache.BindTexture(texture_index, null_surface.GetImageView());
pipeline_cache.BindSampler(texture_index, default_sampler);
}
}
@@ -830,17 +837,12 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
it->second = CreateFramebuffer(framebuffer_info);
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (color_surface) {
runtime.Transition(command_buffer, color_surface->alloc,
vk::ImageLayout::eColorAttachmentOptimal, 0,
color_surface->alloc.levels);
color_surface->Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
}
if (depth_surface) {
runtime.Transition(command_buffer, depth_surface->alloc,
vk::ImageLayout::eDepthStencilAttachmentOptimal, 0,
depth_surface->alloc.levels);
depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
}
const vk::RenderPassBeginInfo renderpass_begin = {
@@ -866,6 +868,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_cache.UseTrivialGeometryShader();
pipeline_cache.BindPipeline(pipeline_info);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 batch_size = static_cast<u32>(vertex_batch.size());
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
@@ -877,12 +881,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
vertex_buffer.Commit(vertex_size);
// Bind the vertex buffer at the current mapped offset. This effectively means
// that when base_vertex is zero the GPU will start drawing from the current mapped
// offset not the start of the buffer.
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
command_buffer.draw(vertices, 1, base_vertex, 0);
}
}
@@ -1633,17 +1632,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info)
return device.createFramebuffer(framebuffer_info);
}
void RasterizerVulkan::CreateDefaultTextures() {
const vk::ImageUsageFlags usage = GetImageUsage(vk::ImageAspectFlagBits::eColor);
default_texture = runtime.Allocate(1, 1, 1, 1, vk::Format::eR8G8B8A8Unorm, usage, {});
default_storage_texture = runtime.Allocate(1, 1, 1, 1, vk::Format::eR8G8B8A8Uint, usage, {});
const vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, default_texture, vk::ImageLayout::eShaderReadOnlyOptimal, 0,
1);
runtime.Transition(command_buffer, default_storage_texture, vk::ImageLayout::eGeneral, 0, 1);
}
void RasterizerVulkan::FlushBuffers() {
vertex_buffer.Flush();
uniform_buffer.Flush();

View File

@@ -249,8 +249,6 @@ private:
/// Creates a new Vulkan framebuffer object
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info);
void CreateDefaultTextures();
private:
const Instance& instance;
TaskScheduler& scheduler;
@@ -280,8 +278,8 @@ private:
std::vector<HardwareVertex> vertex_batch;
std::array<u64, 16> binding_offsets{};
vk::Sampler default_sampler;
ImageAlloc default_texture;
ImageAlloc default_storage_texture;
Surface null_surface;
Surface null_storage_surface;
struct {
Pica::Shader::UniformData data{};

View File

@@ -35,7 +35,9 @@ constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, blit_helper{
instance,
scheduler} {
for (auto& buffer : staging_buffers) {
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
@@ -77,7 +79,10 @@ TextureRuntime::~TextureRuntime() {
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const u32 current_slot = scheduler.GetCurrentSlotIndex();
const u32 offset = staging_offsets[current_slot];
u32& offset = staging_offsets[current_slot];
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
offset = Common::AlignUp(offset, 4);
if (offset + size > STAGING_BUFFER_SIZE) {
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
UNREACHABLE();
@@ -100,19 +105,6 @@ void TextureRuntime::OnSlotSwitch(u32 new_slot) {
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) {
const u32 levels = std::bit_width(std::max(width, height));
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
const VideoCore::HostTextureTag key = {
.format = format, .width = width, .height = height, .layers = layers};
// Attempt to recycle an unused allocation
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
ImageAlloc alloc = std::move(it->second);
texture_recycler.erase(it);
return alloc;
}
const FormatTraits traits = instance.GetTraits(format);
const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));
@@ -121,23 +113,36 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
const vk::ImageUsageFlags vk_usage = is_suitable ? traits.usage : GetImageUsage(aspect);
return Allocate(width, height, type, vk_format, vk_usage);
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::TextureType type,
vk::Format format, vk::ImageUsageFlags usage) {
ImageAlloc alloc{};
alloc.format = format;
alloc.levels = std::bit_width(std::max(width, height));
alloc.layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
alloc.aspect = GetImageAspect(format);
const HostTextureTag key = {.format = format, .type = type, .width = width, .height = height};
// Attempt to recycle an unused allocation
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
ImageAlloc alloc = std::move(it->second);
texture_recycler.erase(it);
return alloc;
}
const vk::ImageCreateFlags flags = type == VideoCore::TextureType::CubeMap
? vk::ImageCreateFlagBits::eCubeCompatible
: vk::ImageCreateFlags{};
return Allocate(width, height, layers, levels, vk_format, vk_usage, flags);
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 layers, u32 levels,
vk::Format format, vk::ImageUsageFlags usage,
vk::ImageCreateFlags flags) {
const vk::ImageAspectFlags aspect = GetImageAspect(format);
const vk::ImageCreateInfo image_info = {.flags = flags,
.imageType = vk::ImageType::e2D,
.format = format,
.extent = {width, height, 1},
.mipLevels = levels,
.arrayLayers = layers,
.mipLevels = alloc.levels,
.arrayLayers = alloc.layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = usage};
@@ -145,79 +150,66 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 layers, u32 level
VkImage unsafe_image{};
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VmaAllocation allocation;
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
&unsafe_image, &allocation, nullptr);
&unsafe_image, &alloc.allocation, nullptr);
if (result != VK_SUCCESS) [[unlikely]] {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
UNREACHABLE();
}
const vk::ImageViewType view_type = flags & vk::ImageCreateFlagBits::eCubeCompatible
? vk::ImageViewType::eCube
: vk::ImageViewType::e2D;
const vk::ImageViewType view_type =
type == VideoCore::TextureType::CubeMap ? vk::ImageViewType::eCube : vk::ImageViewType::e2D;
vk::Image image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {.image = image,
alloc.image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {.image = alloc.image,
.viewType = view_type,
.format = format,
.subresourceRange = {.aspectMask = aspect,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = 0,
.levelCount = levels,
.levelCount = alloc.levels,
.baseArrayLayer = 0,
.layerCount = layers}};
.layerCount = alloc.layers}};
vk::Device device = instance.GetDevice();
vk::ImageView image_view = device.createImageView(view_info);
alloc.image_view = device.createImageView(view_info);
// Also create a base mip view in case this is used as an attachment
vk::ImageView base_view;
if (levels > 1) [[likely]] {
const vk::ImageViewCreateInfo base_view_info = {.image = image,
.viewType = view_type,
.format = format,
.subresourceRange = {.aspectMask = aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = layers}};
if (alloc.levels > 1) [[likely]] {
const vk::ImageViewCreateInfo base_view_info = {
.image = alloc.image,
.viewType = view_type,
.format = format,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = alloc.layers}};
base_view = device.createImageView(base_view_info);
alloc.base_view = device.createImageView(base_view_info);
}
// Create seperate depth/stencil views in case this gets reinterpreted with a compute shader
vk::ImageView depth_view;
vk::ImageView stencil_view;
if (aspect & vk::ImageAspectFlagBits::eStencil) {
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
vk::ImageViewCreateInfo view_info = {
.image = image,
.image = alloc.image,
.viewType = view_type,
.format = format,
.subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eDepth,
.baseMipLevel = 0,
.levelCount = levels,
.levelCount = alloc.levels,
.baseArrayLayer = 0,
.layerCount = layers}};
.layerCount = alloc.layers}};
depth_view = device.createImageView(view_info);
alloc.depth_view = device.createImageView(view_info);
view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil;
stencil_view = device.createImageView(view_info);
alloc.stencil_view = device.createImageView(view_info);
}
return ImageAlloc{.image = image,
.image_view = image_view,
.base_view = base_view,
.depth_view = depth_view,
.stencil_view = stencil_view,
.allocation = allocation,
.format = format,
.aspect = aspect,
.levels = levels,
.layers = layers};
return alloc;
}
void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc) {
void TextureRuntime::Recycle(const HostTextureTag tag, ImageAlloc&& alloc) {
texture_recycler.emplace(tag, std::move(alloc));
}
@@ -239,6 +231,8 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa
}
} else {
switch (surface.pixel_format) {
case VideoCore::PixelFormat::RGBA8:
return Pica::Texture::ConvertABGRToRGBA(source, dest);
case VideoCore::PixelFormat::RGBA4:
return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
case VideoCore::PixelFormat::RGB8:
@@ -258,10 +252,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
const vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
surface.alloc.levels, 0,
surface.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
surface.Transition(vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1);
vk::ClearValue clear_value{};
if (aspect & vk::ImageAspectFlagBits::eColor) {
@@ -282,6 +273,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
.baseArrayLayer = 0,
.layerCount = 1};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (aspect & vk::ImageAspectFlagBits::eColor) {
command_buffer.clearColorImage(surface.alloc.image,
vk::ImageLayout::eTransferDstOptimal, clear_value.color,
@@ -295,24 +287,22 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
} else {
// For partial clears we begin a clear renderpass with the appropriate render area
vk::RenderPass clear_renderpass{};
ImageAlloc& alloc = surface.alloc;
if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_renderpass = renderpass_cache.GetRenderpass(
surface.pixel_format, VideoCore::PixelFormat::Invalid, true);
Transition(command_buffer, alloc, vk::ImageLayout::eColorAttachmentOptimal, 0,
alloc.levels);
surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
} else if (aspect & vk::ImageAspectFlagBits::eDepth ||
aspect & vk::ImageAspectFlagBits::eStencil) {
clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
surface.pixel_format, true);
Transition(command_buffer, alloc, vk::ImageLayout::eDepthStencilAttachmentOptimal, 0,
alloc.levels);
surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
}
const vk::ImageView framebuffer_view = surface.GetFramebufferView();
auto [it, new_framebuffer] =
clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{});
clear_framebuffers.try_emplace(framebuffer_view, vk::Framebuffer{});
if (new_framebuffer) {
const vk::ImageView framebuffer_view = surface.GetFramebufferView();
const vk::FramebufferCreateInfo framebuffer_info = {.renderPass = clear_renderpass,
.attachmentCount = 1,
.pAttachments = &framebuffer_view,
@@ -345,6 +335,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
const VideoCore::TextureCopy& copy) {
renderpass_cache.ExitRenderpass();
source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
const vk::ImageCopy image_copy = {
.srcSubresource = {.aspectMask = ToVkAspect(source.type),
.mipLevel = copy.src_level,
@@ -359,11 +352,6 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.extent = {copy.extent.width, copy.extent.height, 1}};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0,
source.alloc.levels);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
dest.alloc.levels);
command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
@@ -374,12 +362,8 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0,
source.alloc.levels, 0,
source.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0,
dest.alloc.levels, 0, dest.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left),
static_cast<s32>(blit.src_rect.bottom), 0},
@@ -402,6 +386,7 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
.layerCount = 1},
.dstOffsets = dest_offsets};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area,
vk::Filter::eNearest);
@@ -420,8 +405,8 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) {
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1);
surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width, current_height, 1}};
@@ -461,9 +446,9 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const {
}
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count, u32 layer,
u32 layer_count) {
if (new_layout == alloc.layout || !alloc.image) {
vk::ImageLayout new_layout, u32 level, u32 level_count) {
LayoutTracker& tracker = alloc.tracker;
if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) {
return;
}
@@ -540,28 +525,33 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
return info;
};
LayoutInfo source = GetLayoutInfo(alloc.layout);
LayoutInfo dest = GetLayoutInfo(new_layout);
tracker.ForEachLayoutRange(
level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) {
LayoutInfo source = GetLayoutInfo(old_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = alloc.image,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = start,
.levelCount = count,
.baseArrayLayer = 0,
.layerCount = alloc.layers}};
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = alloc.layout,
.newLayout = new_layout,
.image = alloc.image,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = /*level*/ 0,
.levelCount = /*level_count*/ alloc.levels,
.baseArrayLayer = layer,
.layerCount = layer_count}};
command_buffer.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
});
command_buffer.pipelineBarrier(source.stage, dest.stage, vk::DependencyFlagBits::eByRegion, {},
{}, barrier);
alloc.layout = new_layout;
tracker.SetLayout(new_layout, level, level_count);
for (u32 i = 0; i < level_count; i++) {
ASSERT(alloc.tracker.GetLayout(level + i) == new_layout);
}
}
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
Surface::Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} {
@@ -571,36 +561,52 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
}
}
Surface::Surface(const VideoCore::SurfaceParams& params, vk::Format format,
vk::ImageUsageFlags usage, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
if (format != vk::Format::eUndefined) {
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), texture_type, format, usage);
}
}
Surface::~Surface() {
if (pixel_format != VideoCore::PixelFormat::Invalid) {
const VideoCore::HostTextureTag tag = {
.format = pixel_format,
.width = GetScaledWidth(),
.height = GetScaledHeight(),
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u};
const HostTextureTag tag = {.format = alloc.format,
.type = texture_type,
.width = GetScaledWidth(),
.height = GetScaledHeight()};
runtime.Recycle(tag, std::move(alloc));
}
}
void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, alloc, new_layout, level, level_count);
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload);
if (type == VideoCore::SurfaceType::DepthStencil) {
LOG_ERROR(Render_Vulkan, "Depth upload unimplemented, ignoring");
return;
}
runtime.renderpass_cache.ExitRenderpass();
const bool is_scaled = res_scale != 1;
if (is_scaled) {
LOG_ERROR(Render_Vulkan, "Unimplemented scaled upload!");
ScaledUpload(upload);
ScaledUpload(upload, staging);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = upload.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferOffset = staging.buffer_offset + upload.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = alloc.aspect,
@@ -623,9 +629,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
}
}
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0,
alloc.levels, 0,
texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyBufferToImage(staging.buffer, alloc.image,
vk::ImageLayout::eTransferDstOptimal, region_count,
copy_regions.data());
@@ -644,17 +650,19 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
runtime.renderpass_cache.ExitRenderpass();
// For depth stencil downloads always use the compute shader fallback
// to avoid having the interleave the data later. These should(?) be
// uncommon anyways and the perf hit is very small
if (type == VideoCore::SurfaceType::DepthStencil) {
return DepthStencilDownload(download, staging);
}
const bool is_scaled = res_scale != 1;
if (is_scaled) {
LOG_ERROR(Render_Vulkan, "Unimplemented scaled download!");
ScaledDownload(download);
ScaledDownload(download, staging);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = {
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset + download.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
@@ -665,25 +673,12 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += 4 * staging.size / 5;
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, 0,
alloc.levels);
Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
// Copy pixel data to the staging buffer
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
staging.buffer, copy_region);
}
// Lock this data until the next scheduler switch
@@ -692,66 +687,127 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
}
u32 Surface::GetInternalBytesPerPixel() const {
// Request 5 bytes for D24S8 as well because we can use the
// extra space when deinterleaving the data during upload
if (alloc.format == vk::Format::eD24UnormS8Uint) {
return 5;
}
return vk::blockSize(alloc.format);
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
/*const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type,
texture);
// Blit the scaled rectangle to the unscaled texture
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
const auto& tuple = runtime.GetFormatTuple(pixel_format);
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
}*/
}
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
/*const u32 rect_width = upload.texture_rect.GetWidth();
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = upload.texture_rect.GetHeight();
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const auto& filterer = runtime.GetFilterer();
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type,
texture);
// If filtering fails, resort to normal blitting
glBlitFramebuffer(0, 0, rect_width, rect_height,
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.right, upload.texture_rect.top,
MakeBufferMask(type), GL_LINEAR);
}*/
SurfaceParams unscaled_params = *this;
unscaled_params.width = rect_width;
unscaled_params.stride = rect_width;
unscaled_params.height = rect_height;
unscaled_params.res_scale = 1;
Surface unscaled_surface{unscaled_params, runtime};
const VideoCore::BufferTextureCopy unscaled_upload = {.buffer_offset = upload.buffer_offset,
.buffer_size = upload.buffer_size,
.texture_rect = unscaled_rect};
unscaled_surface.Upload(unscaled_upload, staging);
const VideoCore::TextureBlit blit = {.src_level = 0,
.dst_level = upload.texture_level,
.src_layer = 0,
.dst_layer = 0,
.src_rect = unscaled_rect,
.dst_rect = scaled_rect};
runtime.BlitTextures(unscaled_surface, *this, blit);
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging) {
const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
const VideoCore::Rect2D unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
SurfaceParams unscaled_params = *this;
unscaled_params.width = rect_width;
unscaled_params.stride = rect_width;
unscaled_params.height = rect_height;
unscaled_params.res_scale = 1;
Surface unscaled_surface{unscaled_params, runtime};
const VideoCore::TextureBlit blit = {.src_level = download.texture_level,
.dst_level = 0,
.src_layer = 0,
.dst_layer = 0,
.src_rect = scaled_rect,
.dst_rect = unscaled_rect};
// Blit the scaled rectangle to the unscaled texture
runtime.BlitTextures(*this, unscaled_surface, blit);
const VideoCore::BufferTextureCopy unscaled_download = {.buffer_offset = download.buffer_offset,
.buffer_size = download.buffer_size,
.texture_rect = unscaled_rect,
.texture_level = 0};
unscaled_surface.Download(unscaled_download, staging);
}
void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging) {
const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
const VideoCore::Rect2D unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const VideoCore::Rect2D r32_scaled_rect =
VideoCore::Rect2D{0, scaled_rect.GetHeight(), scaled_rect.GetWidth(), 0};
// For depth downloads create an R32UI surface and use a compute shader for convert.
// Then we blit and download that surface
SurfaceParams r32_params = *this;
r32_params.width = scaled_rect.GetWidth();
r32_params.stride = scaled_rect.GetWidth();
r32_params.height = scaled_rect.GetHeight();
r32_params.type = VideoCore::SurfaceType::Color;
r32_params.res_scale = 1;
Surface r32_surface{r32_params, vk::Format::eR32Uint,
vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eStorage,
runtime};
const VideoCore::TextureBlit blit = {.src_level = download.texture_level,
.dst_level = 0,
.src_layer = 0,
.dst_layer = 0,
.src_rect = scaled_rect,
.dst_rect = r32_scaled_rect};
runtime.blit_helper.BlitD24S8ToR32(*this, r32_surface, blit);
// Blit the upper mip level to the lower one to scale without additional allocations
const bool is_scaled = res_scale != 1;
if (is_scaled) {
const VideoCore::TextureBlit r32_blit = {.src_level = 0,
.dst_level = 1,
.src_layer = 0,
.dst_layer = 0,
.src_rect = r32_scaled_rect,
.dst_rect = unscaled_rect};
runtime.BlitTextures(r32_surface, r32_surface, r32_blit);
}
const VideoCore::BufferTextureCopy r32_download = {.buffer_offset = download.buffer_offset,
.buffer_size = download.buffer_size,
.texture_rect = unscaled_rect,
.texture_level = is_scaled ? 1u : 0u};
r32_surface.Download(r32_download, staging);
}
} // namespace Vulkan

View File

@@ -9,8 +9,10 @@
#include <vulkan/vulkan_hash.hpp>
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
@@ -24,6 +26,14 @@ struct StagingData {
};
struct ImageAlloc {
ImageAlloc() = default;
ImageAlloc(const ImageAlloc&) = delete;
ImageAlloc& operator=(const ImageAlloc&) = delete;
ImageAlloc(ImageAlloc&&) = default;
ImageAlloc& operator=(ImageAlloc&&) = default;
vk::Image image;
vk::ImageView image_view;
vk::ImageView base_view;
@@ -32,12 +42,38 @@ struct ImageAlloc {
VmaAllocation allocation;
vk::ImageUsageFlags usage;
vk::Format format;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor;
u32 levels = 1;
u32 layers = 1;
LayoutTracker tracker;
};
struct HostTextureTag {
vk::Format format = vk::Format::eUndefined;
VideoCore::TextureType type = VideoCore::TextureType::Texture2D;
u32 width = 1;
u32 height = 1;
auto operator<=>(const HostTextureTag&) const noexcept = default;
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(HostTextureTag));
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::HostTextureTag> {
std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept {
return tag.Hash();
}
};
} // namespace std
namespace Vulkan {
class Instance;
class RenderpassCache;
class Surface;
@@ -62,15 +98,14 @@ public:
VideoCore::TextureType type);
/// Allocates a vulkan image
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, u32 layers, u32 levels,
vk::Format format, vk::ImageUsageFlags usage,
vk::ImageCreateFlags flags);
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::TextureType type,
vk::Format format, vk::ImageUsageFlags usage);
/// Causes a GPU command flush
void Finish();
/// Takes back ownership of the allocation for recycling
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
void Recycle(const HostTextureTag tag, ImageAlloc&& alloc);
/// Performs required format convertions on the staging data
void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
@@ -78,7 +113,7 @@ public:
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout,
u32 level, u32 level_count, u32 layer = 0, u32 layer_count = 1);
u32 level, u32 level_count);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
@@ -118,10 +153,11 @@ private:
const Instance& instance;
TaskScheduler& scheduler;
RenderpassCache& renderpass_cache;
BlitHelper blit_helper;
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
std::unordered_multimap<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
std::unordered_multimap<HostTextureTag, ImageAlloc> texture_recycler;
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
};
@@ -130,9 +166,14 @@ class Surface : public VideoCore::SurfaceBase<Surface> {
friend class RasterizerVulkan;
public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime);
Surface(const VideoCore::SurfaceParams& params, vk::Format format, vk::ImageUsageFlags usage,
TextureRuntime& runtime);
~Surface() override;
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::ImageLayout new_layout, u32 level, u32 level_count);
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
@@ -168,14 +209,15 @@ public:
}
private:
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
/// Uploads pixel data to scaled texture
void ScaledUpload(const VideoCore::BufferTextureCopy& upload);
void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
/// Overrides the image layout of the mip level range
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& stagings);
/// Downloads scaled depth stencil data
void DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging);
private:
TextureRuntime& runtime;
@@ -183,7 +225,7 @@ private:
TaskScheduler& scheduler;
public:
ImageAlloc alloc{};
ImageAlloc alloc;
FormatTraits traits;
};