renderer_vulkan: Add experimental Vulkan renderer

* Stil extremelly WIP and missing the rasterizer/renderer classes
This commit is contained in:
emufan4568
2022-09-16 17:19:09 +03:00
committed by GPUCode
parent e1542cea84
commit c85731f3ae
35 changed files with 6096 additions and 14 deletions

3
.gitmodules vendored
View File

@ -61,3 +61,6 @@
[submodule "vulkan-headers"]
path = externals/vulkan-headers
url = https://github.com/KhronosGroup/Vulkan-Headers
[submodule "glslang"]
path = externals/glslang
url = https://github.com/KhronosGroup/glslang

View File

@ -60,6 +60,9 @@ endif()
# Glad
add_subdirectory(glad)
# glslang
add_subdirectory(glslang)
# inih
add_subdirectory(inih)

1
externals/glslang vendored Submodule

Submodule externals/glslang added at c0cf8ad876

View File

@ -299,6 +299,40 @@ public:
}
};
static Frontend::WindowSystemType GetWindowSystemType() {
// Determine WSI type based on Qt platform.
QString platform_name = QGuiApplication::platformName();
if (platform_name == QStringLiteral("windows"))
return Frontend::WindowSystemType::Windows;
else if (platform_name == QStringLiteral("xcb"))
return Frontend::WindowSystemType::X11;
else if (platform_name == QStringLiteral("wayland"))
return Frontend::WindowSystemType::Wayland;
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
return Frontend::WindowSystemType::Windows;
}
static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
Frontend::EmuWindow::WindowSystemInfo wsi;
wsi.type = GetWindowSystemType();
// Our Win32 Qt external doesn't have the private API.
#if defined(WIN32) || defined(__APPLE__)
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
#else
QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
wsi.display_connection = pni->nativeResourceForWindow("display", window);
if (wsi.type == Frontend::WindowSystemType::Wayland)
wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
else
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
#endif
wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
return wsi;
}
GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread)
: QWidget(parent_), emu_thread(emu_thread) {
@ -532,6 +566,9 @@ bool GRenderWindow::InitRenderTarget() {
break;
}
// Update the Window System information with the new render target
window_info = GetWindowSystemInfo(child_widget->windowHandle());
child_widget->resize(Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight);
layout()->addWidget(child_widget);

View File

@ -6,6 +6,7 @@
#include <cstddef>
#include <cstring>
#include <concepts>
#include "common/cityhash.h"
#include "common/common_types.h"
@ -41,6 +42,13 @@ inline u64 HashCombine(std::size_t& seed, const u64 hash) {
return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
template <std::integral T>
struct IdentityHash {
T operator()(const T& value) const {
return value;
}
};
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
template <typename T>
struct HashableStruct {

View File

@ -8,6 +8,7 @@
#include <array>
#include "common/common_types.h"
#include "common/logging/formatter.h"
namespace Log {
// trims up to and including the last of ../, ..\, src/, src\ in a string
@ -102,6 +103,7 @@ enum class Class : ClassType {
Render, ///< Emulator video output and hardware acceleration
Render_Software, ///< Software renderer backend
Render_OpenGL, ///< OpenGL backend
Render_Vulkan, ///< Vulkan backend
Audio, ///< Audio emulation
Audio_DSP, ///< The HLE and LLE implementations of the DSP
Audio_Sink, ///< Emulator audio output backend

View File

@ -12,6 +12,15 @@
namespace Frontend {
/// Information for the Graphics Backends signifying what type of screen pointer is in
/// WindowInformation
enum class WindowSystemType {
Headless,
Windows,
X11,
Wayland,
};
struct Frame;
/**
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
@ -117,6 +126,23 @@ public:
std::pair<unsigned, unsigned> min_client_area_size;
};
/// Data describing host window system information
struct WindowSystemInfo {
// Window system type. Determines which GL context or Vulkan WSI is used.
WindowSystemType type = WindowSystemType::Headless;
// Connection to a display server. This is used on X11 and Wayland platforms.
void* display_connection = nullptr;
// Render surface. This is a pointer to the native window handle, which depends
// on the platform. e.g. HWND for Windows, Window for X11. If the surface is
// set to nullptr, the video backend will run in headless mode.
void* render_surface = nullptr;
// Scale of the render surface. For hidpi systems, this will be >1.
float render_surface_scale = 1.0f;
};
/// Polls window events
virtual void PollEvents() = 0;
@ -180,6 +206,13 @@ public:
config = val;
}
/**
* Returns system information about the drawing area.
*/
const WindowSystemInfo& GetWindowInfo() const {
return window_info;
}
/**
* Gets the framebuffer layout (width, height, and screen regions)
* @note This method is thread-safe
@ -226,6 +259,8 @@ protected:
framebuffer_layout = layout;
}
WindowSystemInfo window_info;
private:
/**
* Handler called when the minimal client area was requested to be changed via SetConfig.

View File

@ -169,7 +169,7 @@ struct Values {
u64 init_time;
// Renderer
GraphicsAPI graphics_api = GraphicsAPI::OpenGL;
GraphicsAPI graphics_api = GraphicsAPI::Vulkan;
bool use_hw_renderer;
bool use_hw_shader;
bool separable_shader;

View File

@ -82,9 +82,32 @@ add_library(video_core STATIC
#temporary, move these back in alphabetical order before merging
renderer_opengl/gl_format_reinterpreter.cpp
renderer_opengl/gl_format_reinterpreter.h
renderer_vulkan/pica_to_vk.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_platform.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_shader_gen.cpp
renderer_vulkan/vk_shader_gen.h
renderer_vulkan/vk_shader.cpp
renderer_vulkan/vk_shader.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_task_scheduler.cpp
renderer_vulkan/vk_task_scheduler.h
renderer_vulkan/vk_texture_runtime.cpp
renderer_vulkan/vk_texture_runtime.h
shader/debug_data.h
shader/shader.cpp
shader/shader.h
shader/shader_cache.h
shader/shader_interpreter.cpp
shader/shader_interpreter.h
swrasterizer/clipper.cpp
@ -156,8 +179,11 @@ endif()
create_target_directory_groups(video_core)
# Include Vulkan headers
target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include)
target_include_directories(video_core PRIVATE ../../externals/vma)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad nihstro-headers Boost::serialization)
target_link_libraries(video_core PRIVATE glad glslang nihstro-headers Boost::serialization)
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
if (ARCHITECTURE_x86_64)

View File

@ -6,8 +6,7 @@
#include <array>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {
@ -18,7 +17,7 @@ struct RasterizerRegs {
KeepAll = 0,
KeepClockWise = 1,
KeepCounterClockWise = 2,
// TODO: What does the third value imply?
KeepAll2 = 3
};
union {

View File

@ -243,17 +243,12 @@ private:
};
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
const std::string_view version{reinterpret_cast<const char*>(glGetString(GL_VERSION))};
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
}

View File

@ -302,9 +302,9 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
}
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
MICROPROFILE_SCOPE(OpenGL_Upload);
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
@ -339,9 +339,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
InvalidateAllWatcher();
}
MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64));
MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64));
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) {
MICROPROFILE_SCOPE(RasterizerCache_TextureDL);
MICROPROFILE_SCOPE(OpenGL_Download);
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);

View File

@ -0,0 +1,278 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace PicaToVK {
using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
struct FilterInfo {
vk::Filter mag_filter, min_filter;
vk::SamplerMipmapMode mip_mode;
};
inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
constexpr std::array filter_table = {
vk::Filter::eNearest,
vk::Filter::eLinear
};
constexpr std::array mipmap_table = {
vk::SamplerMipmapMode::eNearest,
vk::SamplerMipmapMode::eLinear
};
return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
}
inline vk::Filter TextureFilterMode(TextureFilter mode) {
switch (mode) {
case TextureFilter::Linear:
return vk::Filter::eLinear;
case TextureFilter::Nearest:
return vk::Filter::eNearest;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown texture filtering mode {}", mode);
UNIMPLEMENTED();
}
return vk::Filter::eLinear;
}
inline vk::SamplerMipmapMode TextureMipFilterMode(TextureFilter mip) {
switch (mip) {
case TextureFilter::Linear:
return vk::SamplerMipmapMode::eLinear;
case TextureFilter::Nearest:
return vk::SamplerMipmapMode::eNearest;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown texture mipmap filtering mode {}", mip);
UNIMPLEMENTED();
}
return vk::SamplerMipmapMode::eLinear;
}
inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
vk::SamplerAddressMode::eClampToEdge,
vk::SamplerAddressMode::eClampToBorder,
vk::SamplerAddressMode::eRepeat,
vk::SamplerAddressMode::eMirroredRepeat,
// TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
// comments in enum WrapMode.
vk::SamplerAddressMode::eClampToEdge,
vk::SamplerAddressMode::eClampToBorder,
vk::SamplerAddressMode::eRepeat,
vk::SamplerAddressMode::eRepeat,
}};
const auto index = static_cast<std::size_t>(mode);
// Range check table for input
if (index >= wrap_mode_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown texture wrap mode {}", index);
UNREACHABLE();
return vk::SamplerAddressMode::eClampToEdge;
}
if (index > 3) {
Core::System::GetInstance().TelemetrySession().AddField(
Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode",
static_cast<u32>(index));
LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index);
}
return wrap_mode_table[index];
}
inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
vk::BlendOp::eAdd,
vk::BlendOp::eSubtract,
vk::BlendOp::eReverseSubtract,
vk::BlendOp::eMin,
vk::BlendOp::eMax,
}};
const auto index = static_cast<std::size_t>(equation);
// Range check table for input
if (index >= blend_equation_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown blend equation {}", index);
// This return value is hwtested, not just a stub
return vk::BlendOp::eAdd;
}
return blend_equation_table[index];
}
inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
vk::BlendFactor::eZero, // BlendFactor::Zero
vk::BlendFactor::eOne, // BlendFactor::One
vk::BlendFactor::eSrcColor, // BlendFactor::SourceColor
vk::BlendFactor::eOneMinusSrcColor, // BlendFactor::OneMinusSourceColor
vk::BlendFactor::eDstColor, // BlendFactor::DestColor
vk::BlendFactor::eOneMinusDstColor, // BlendFactor::OneMinusDestColor
vk::BlendFactor::eSrcAlpha, // BlendFactor::SourceAlpha
vk::BlendFactor::eOneMinusSrcAlpha, // BlendFactor::OneMinusSourceAlpha
vk::BlendFactor::eDstAlpha, // BlendFactor::DestAlpha
vk::BlendFactor::eOneMinusDstAlpha, // BlendFactor::OneMinusDestAlpha
vk::BlendFactor::eConstantColor, // BlendFactor::ConstantColor
vk::BlendFactor::eOneMinusConstantColor,// BlendFactor::OneMinusConstantColor
vk::BlendFactor::eConstantAlpha, // BlendFactor::ConstantAlpha
vk::BlendFactor::eOneMinusConstantAlpha,// BlendFactor::OneMinusConstantAlpha
vk::BlendFactor::eSrcAlphaSaturate, // BlendFactor::SourceAlphaSaturate
}};
const auto index = static_cast<std::size_t>(factor);
// Range check table for input
if (index >= blend_func_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown blend factor {}", index);
UNREACHABLE();
return vk::BlendFactor::eOne;
}
return blend_func_table[index];
}
inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
vk::LogicOp::eClear, // Clear
vk::LogicOp::eAnd, // And
vk::LogicOp::eAndReverse, // AndReverse
vk::LogicOp::eCopy, // Copy
vk::LogicOp::eSet, // Set
vk::LogicOp::eCopyInverted, // CopyInverted
vk::LogicOp::eNoOp, // NoOp
vk::LogicOp::eInvert, // Invert
vk::LogicOp::eNand, // Nand
vk::LogicOp::eOr, // Or
vk::LogicOp::eNor, // Nor
vk::LogicOp::eXor, // Xor
vk::LogicOp::eEquivalent, // Equiv
vk::LogicOp::eAndInverted, // AndInverted
vk::LogicOp::eOrReverse, // OrReverse
vk::LogicOp::eOrInverted, // OrInverted
}};
const auto index = static_cast<std::size_t>(op);
// Range check table for input
if (index >= logic_op_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown logic op {}", index);
UNREACHABLE();
return vk::LogicOp::eCopy;
}
return logic_op_table[index];
}
inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
vk::CompareOp::eNever, // CompareFunc::Never
vk::CompareOp::eAlways, // CompareFunc::Always
vk::CompareOp::eEqual, // CompareFunc::Equal
vk::CompareOp::eNotEqual, // CompareFunc::NotEqual
vk::CompareOp::eLess, // CompareFunc::LessThan
vk::CompareOp::eLessOrEqual, // CompareFunc::LessThanOrEqual
vk::CompareOp::eGreater, // CompareFunc::GreaterThan
vk::CompareOp::eGreaterOrEqual, // CompareFunc::GreaterThanOrEqual
}};
const auto index = static_cast<std::size_t>(func);
// Range check table for input
if (index >= compare_func_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown compare function {}", index);
UNREACHABLE();
return vk::CompareOp::eAlways;
}
return compare_func_table[index];
}
inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
vk::StencilOp::eKeep, // StencilAction::Keep
vk::StencilOp::eZero, // StencilAction::Zero
vk::StencilOp::eReplace, // StencilAction::Replace
vk::StencilOp::eIncrementAndClamp, // StencilAction::Increment
vk::StencilOp::eDecrementAndClamp, // StencilAction::Decrement
vk::StencilOp::eInvert, // StencilAction::Invert
vk::StencilOp::eIncrementAndWrap, // StencilAction::IncrementWrap
vk::StencilOp::eDecrementAndWrap, // StencilAction::DecrementWrap
}};
const auto index = static_cast<std::size_t>(action);
// Range check table for input
if (index >= stencil_op_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown stencil op {}", index);
UNREACHABLE();
return vk::StencilOp::eKeep;
}
return stencil_op_table[index];
}
inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopology topology) {
switch (topology) {
case Pica::PipelineRegs::TriangleTopology::Fan:
return vk::PrimitiveTopology::eTriangleFan;
case Pica::PipelineRegs::TriangleTopology::List:
case Pica::PipelineRegs::TriangleTopology::Shader:
return vk::PrimitiveTopology::eTriangleList;
case Pica::PipelineRegs::TriangleTopology::Strip:
return vk::PrimitiveTopology::eTriangleStrip;
}
}
inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
switch (mode) {
case Pica::RasterizerRegs::CullMode::KeepAll:
case Pica::RasterizerRegs::CullMode::KeepAll2:
return vk::CullModeFlagBits::eNone;
case Pica::RasterizerRegs::CullMode::KeepClockWise:
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
return vk::CullModeFlagBits::eBack;
}
}
inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
switch (mode) {
case Pica::RasterizerRegs::CullMode::KeepAll:
case Pica::RasterizerRegs::CullMode::KeepAll2:
case Pica::RasterizerRegs::CullMode::KeepClockWise:
return vk::FrontFace::eCounterClockwise;
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
return vk::FrontFace::eClockwise;
}
}
inline Common::Vec4f ColorRGBA8(const u32 color) {
const auto rgba =
Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
return rgba / 255.0f;
}
inline Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
}
} // namespace PicaToGL

View File

@ -0,0 +1,9 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VMA_IMPLEMENTATION
#include "video_core/renderer_vulkan/vk_common.h"
// Store the dispatch loader here
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE

View File

@ -0,0 +1,66 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
// Include vulkan-hpp header
#define VK_NO_PROTOTYPES 1
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#include <vulkan/vulkan.hpp>
// Include Vulkan memory allocator
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
#include <vk_mem_alloc.h>
namespace Vulkan {
/// Return the image aspect associated on the provided format
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
switch (format) {
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eX8D24UnormPack32:
case vk::Format::eD32SfloatS8Uint:
return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
break;
case vk::Format::eD16Unorm:
case vk::Format::eD32Sfloat:
return vk::ImageAspectFlagBits::eDepth;
break;
default:
return vk::ImageAspectFlagBits::eColor;
}
}
/// Returns a bit mask with the required usage of a format with a particular aspect
constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
auto usage = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
return usage | vk::ImageUsageFlagBits::eColorAttachment;
}
};
/// Returns a bit mask with the required features of a format with a particular aspect
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
vk::FormatFeatureFlagBits::eTransferDst |
vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
} else {
return usage | vk::FormatFeatureFlagBits::eColorAttachment;
}
};
} // namespace Vulkan

View File

@ -0,0 +1,292 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <span>
#include <array>
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
Instance::Instance(Frontend::EmuWindow& window) {
auto window_info = window.GetWindowInfo();
// Fetch instance independant function pointers
vk::DynamicLoader dl;
auto vkGetInstanceProcAddr = dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
// Enable the instance extensions the backend uses
auto extensions = GetInstanceExtensions(window_info.type, true);
// We require a Vulkan 1.1 driver
const u32 available_version = vk::enumerateInstanceVersion();
if (available_version < VK_API_VERSION_1_1) {
LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
}
const vk::ApplicationInfo application_info = {
.pApplicationName = "Citra",
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
.pEngineName = "Citra Vulkan",
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
.apiVersion = available_version
};
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
const vk::InstanceCreateInfo instance_info = {
.pApplicationInfo = &application_info,
.enabledLayerCount = static_cast<u32>(layers.size()),
.ppEnabledLayerNames = layers.data(),
.enabledExtensionCount = static_cast<u32>(extensions.size()),
.ppEnabledExtensionNames = extensions.data()
};
// Create VkInstance
instance = vk::createInstance(instance_info);
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance);
surface = CreateSurface(instance, window);
// TODO: GPU select dialog
physical_device = instance.enumeratePhysicalDevices()[0];
device_limits = physical_device.getProperties().limits;
// Create logical device
CreateDevice(true);
}
Instance::~Instance() {
device.waitIdle();
vmaDestroyAllocator(allocator);
device.destroy();
instance.destroySurfaceKHR(surface);
instance.destroy();
}
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
if (auto it = supported.find(format); it != supported.end()) {
return (it->second.optimalTilingFeatures & usage) == usage;
}
// Cache format properties so we don't have to query the driver all the time
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
supported.insert(std::make_pair(format, properties));
return (properties.optimalTilingFeatures & usage) == usage;
}
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
if (IsFormatSupported(format, features)) {
return format;
}
// Return the most supported alternative format preferably with the
// same block size according to the Vulkan spec.
// See 43.3. Required Format Support of the Vulkan spec
switch (format) {
case vk::Format::eD24UnormS8Uint:
return vk::Format::eD32SfloatS8Uint;
case vk::Format::eX8D24UnormPack32:
return vk::Format::eD32Sfloat;
case vk::Format::eR5G5B5A1UnormPack16:
return vk::Format::eA1R5G5B5UnormPack16;
case vk::Format::eR8G8B8Unorm:
return vk::Format::eR8G8B8A8Unorm;
case vk::Format::eUndefined:
return vk::Format::eUndefined;
case vk::Format::eR4G4B4A4UnormPack16:
// B4G4R4A4 is not guaranteed by the spec to support attachments
return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16);
default:
LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}",
vk::to_string(format), vk::to_string(features));
return vk::Format::eR8G8B8A8Unorm;
}
}
bool Instance::CreateDevice(bool validation_enabled) {
// Determine required extensions and features
auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
vk::PhysicalDeviceDynamicRenderingFeaturesKHR,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>();
// Not having geometry shaders or wide lines will cause issues with rendering.
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
if (!available.geometryShader && !available.wideLines) {
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
}
// Enable some common features other emulators like Dolphin use
const vk::PhysicalDeviceFeatures2 features = {
.features = {
.robustBufferAccess = available.robustBufferAccess,
.geometryShader = available.geometryShader,
.sampleRateShading = available.sampleRateShading,
.dualSrcBlend = available.dualSrcBlend,
.logicOp = available.logicOp,
.depthClamp = available.depthClamp,
.largePoints = available.largePoints,
.samplerAnisotropy = available.samplerAnisotropy,
.occlusionQueryPrecise = available.occlusionQueryPrecise,
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
.shaderClipDistance = available.shaderClipDistance
}
};
// Enable newer Vulkan features
auto enabled_features = vk::StructureChain{
features,
//feature_chain.get<vk::PhysicalDeviceDynamicRenderingFeaturesKHR>(),
//feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
//feature_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
};
auto extension_list = physical_device.enumerateDeviceExtensionProperties();
if (extension_list.empty()) {
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
return false;
}
// List available device extensions
for (const auto& extension : extension_list) {
LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName);
}
// Helper lambda for adding extensions
std::array<const char*, 6> enabled_extensions;
u32 enabled_extension_count = 0;
auto AddExtension = [&](std::string_view name, bool required) -> bool {
auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
return name.compare(prop.extensionName.data());
});
if (result != extension_list.end()) {
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
enabled_extensions[enabled_extension_count++] = name.data();
return true;
}
if (required) {
LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
}
return false;
};
// Add required extensions
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true);
// Check for optional features
//dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
//extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
//push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
// Search queue families for graphics and present queues
auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) {
LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
return false;
}
graphics_queue_family_index = -1;
present_queue_family_index = -1;
for (int i = 0; i < family_properties.size(); i++) {
// Check if queue supports graphics
if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) {
graphics_queue_family_index = i;
// If this queue also supports presentation we are finished
if (physical_device.getSurfaceSupportKHR(i, surface)) {
present_queue_family_index = i;
break;
}
}
// Check if queue supports presentation
if (physical_device.getSurfaceSupportKHR(i, surface)) {
present_queue_family_index = i;
}
}
if (graphics_queue_family_index == -1 || present_queue_family_index == -1) {
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
return false;
}
static constexpr float queue_priorities[] = {1.0f};
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
const std::array queue_infos = {
vk::DeviceQueueCreateInfo{
.queueFamilyIndex = graphics_queue_family_index,
.queueCount = 1,
.pQueuePriorities = queue_priorities
},
vk::DeviceQueueCreateInfo{
.queueFamilyIndex = present_queue_family_index,
.queueCount = 1,
.pQueuePriorities = queue_priorities
}
};
vk::DeviceCreateInfo device_info = {
.pNext = &features, // TODO: Change this
.queueCreateInfoCount = 1,
.pQueueCreateInfos = queue_infos.data(),
.enabledExtensionCount = enabled_extension_count,
.ppEnabledExtensionNames = enabled_extensions.data(),
};
if (graphics_queue_family_index != present_queue_family_index) {
device_info.queueCreateInfoCount = 2;
}
// Enable debug layer on debug builds
if (validation_enabled) {
device_info.enabledLayerCount = static_cast<u32>(layers.size());
device_info.ppEnabledLayerNames = layers.data();
}
// Create logical device
device = physical_device.createDevice(device_info);
VULKAN_HPP_DEFAULT_DISPATCHER.init(device);
// Grab the graphics and present queues.
graphics_queue = device.getQueue(graphics_queue_family_index, 0);
present_queue = device.getQueue(present_queue_family_index, 0);
// Create the VMA allocator
CreateAllocator();
return true;
}
void Instance::CreateAllocator() {
VmaVulkanFunctions functions = {
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
};
VmaAllocatorCreateInfo allocator_info = {
.physicalDevice = physical_device,
.device = device,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = VK_API_VERSION_1_1
};
if (auto result = vmaCreateAllocator(&allocator_info, &allocator); result != VK_SUCCESS) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize VMA with error {}", result);
UNREACHABLE();
}
}
} // namespace Vulkan

View File

@ -0,0 +1,129 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Frontend {
class EmuWindow;
}
namespace Vulkan {
/// The global Vulkan instance
class Instance {
public:
Instance(Frontend::EmuWindow& window);
~Instance();
/// Returns true when the format supports the provided feature flags
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
/// Returns the most compatible format that supports the provided feature flags
vk::Format GetFormatAlternative(vk::Format format) const;
/// Returns the Vulkan instance
vk::Instance GetInstance() const {
return instance;
}
/// Returns the Vulkan surface
vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Returns the current physical device
vk::PhysicalDevice GetPhysicalDevice() const {
return physical_device;
}
/// Returns the Vulkan device
vk::Device GetDevice() const {
return device;
}
VmaAllocator GetAllocator() const {
return allocator;
}
/// Retrieve queue information
u32 GetGraphicsQueueFamilyIndex() const {
return graphics_queue_family_index;
}
u32 GetPresentQueueFamilyIndex() const {
return present_queue_family_index;
}
vk::Queue GetGraphicsQueue() const {
return graphics_queue;
}
vk::Queue GetPresentQueue() const {
return present_queue;
}
/// Feature support
bool IsDynamicRenderingSupported() const {
return dynamic_rendering;
}
bool IsExtendedDynamicStateSupported() const {
// TODO: Enable this when the pipeline builder is confirmed functional
return false;
}
bool IsPushDescriptorsSupported() const {
return push_descriptors;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return device_properties.vendorID;
}
/// Returns the device ID of the physical device
u32 GetDeviceID() const {
return device_properties.deviceID;
}
/// Returns the pipeline cache unique identifier
const auto GetPipelineCacheUUID() const {
return device_properties.pipelineCacheUUID;
}
/// Returns the minimum required alignment for uniforms
vk::DeviceSize UniformMinAlignment() const {
return device_limits.minUniformBufferOffsetAlignment;
}
private:
bool CreateDevice(bool validation_enabled);
void CreateAllocator();
private:
// Queue family indexes
u32 present_queue_family_index = 0;
u32 graphics_queue_family_index = 0;
vk::Queue present_queue, graphics_queue;
// Core vulkan objects
vk::Device device;
vk::PhysicalDevice physical_device;
vk::Instance instance;
vk::SurfaceKHR surface;
vk::PhysicalDeviceLimits device_limits;
vk::PhysicalDeviceProperties device_properties;
VmaAllocator allocator;
// Features per vulkan version
bool dynamic_rendering = false;
bool extended_dynamic_state = false;
bool push_descriptors = false;
};
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,646 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
struct Bindings {
std::array<vk::DescriptorType, MAX_DESCRIPTORS> bindings;
u32 binding_count;
};
constexpr u32 RASTERIZER_SET_COUNT = 4;
constexpr static std::array RASTERIZER_SETS = {
Bindings{
// Utility set
.bindings = {
vk::DescriptorType::eUniformBuffer,
vk::DescriptorType::eUniformBuffer,
vk::DescriptorType::eUniformTexelBuffer,
vk::DescriptorType::eUniformTexelBuffer,
vk::DescriptorType::eUniformTexelBuffer
},
.binding_count = 5
},
Bindings{
// Texture set
.bindings = {
vk::DescriptorType::eSampledImage,
vk::DescriptorType::eSampledImage,
vk::DescriptorType::eSampledImage,
vk::DescriptorType::eSampledImage
},
.binding_count = 4
},
Bindings{
// Sampler set
.bindings = {
vk::DescriptorType::eSampler,
vk::DescriptorType::eSampler,
vk::DescriptorType::eSampler,
vk::DescriptorType::eSampler
},
.binding_count = 4
},
Bindings {
// Shadow set
.bindings = {
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage
},
.binding_count = 7
}
};
constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) {
vk::ShaderStageFlags flags;
switch (type) {
case vk::DescriptorType::eSampler:
case vk::DescriptorType::eSampledImage:
case vk::DescriptorType::eUniformTexelBuffer:
case vk::DescriptorType::eStorageImage:
flags = vk::ShaderStageFlagBits::eFragment;
break;
case vk::DescriptorType::eUniformBuffer:
case vk::DescriptorType::eUniformBufferDynamic:
flags = vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eCompute;
break;
default:
LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
}
return flags;
}
u32 AttribBytes(VertexAttribute attrib) {
switch (attrib.type) {
case AttribType::Float:
return sizeof(float) * attrib.size;
case AttribType::Int:
return sizeof(u32) * attrib.size;
case AttribType::Short:
return sizeof(u16) * attrib.size;
case AttribType::Byte:
case AttribType::Ubyte:
return sizeof(u8) * attrib.size;
}
}
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
switch (attrib.type) {
case AttribType::Float:
switch (attrib.size) {
case 1: return vk::Format::eR32Sfloat;
case 2: return vk::Format::eR32G32Sfloat;
case 3: return vk::Format::eR32G32B32Sfloat;
case 4: return vk::Format::eR32G32B32A32Sfloat;
}
default:
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
UNREACHABLE();
}
return vk::Format::eR32Sfloat;
}
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
switch (index) {
case 0: return vk::ShaderStageFlagBits::eVertex;
case 1: return vk::ShaderStageFlagBits::eFragment;
case 2: return vk::ShaderStageFlagBits::eGeometry;
default:
LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!");
UNREACHABLE();
}
return vk::ShaderStageFlagBits::eVertex;
}
PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
descriptor_dirty.fill(true);
LoadDiskCache();
}
PipelineCache::~PipelineCache() {
vk::Device device = instance.GetDevice();
SaveDiskCache();
device.destroyPipelineLayout(layout);
for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
device.destroyDescriptorSetLayout(descriptor_set_layouts[i]);
device.destroyDescriptorUpdateTemplate(update_templates[i]);
}
for (const auto& [hash, pipeline] : graphics_pipelines) {
device.destroyPipeline(pipeline);
}
graphics_pipelines.clear();
}
void PipelineCache::BindPipeline(const PipelineInfo& info) {
ApplyDynamic(info);
u64 shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() ?
offsetof(PipelineInfo, rasterization) :
offsetof(PipelineInfo, depth_stencil) + offsetof(DepthStencilState, stencil_reference);
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
if (new_pipeline) {
it->second = BuildPipeline(info);
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
BindDescriptorSets();
}
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) {
const PicaVSConfig config{regs.vs, setup};
auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
if (!handle) {
return false;
}
current_shaders[ProgramType::VS] = handle;
shader_hashes[ProgramType::VS] = config.Hash();
return true;
}
void PipelineCache::UseTrivialVertexShader() {
current_shaders[ProgramType::VS] = trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
}
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
const PicaFixedGSConfig gs_config{regs};
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::Debug);
current_shaders[ProgramType::GS] = handle;
shader_hashes[ProgramType::GS] = gs_config.Hash();
}
void PipelineCache::UseTrivialGeometryShader() {
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
shader_hashes[ProgramType::GS] = 0;
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
instance.GetDevice(), ShaderOptimization::Debug);
current_shaders[ProgramType::FS] = handle;
shader_hashes[ProgramType::FS] = config.Hash();
}
void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_view) {
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.imageView = image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
}
};
SetBinding(set, descriptor, data);
}
void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 offset, u32 size) {
const DescriptorData data = {
.buffer_info = vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size
}
};
SetBinding(set, descriptor, data);
}
void PipelineCache::BindTexelBuffer(u32 set, u32 descriptor, vk::BufferView buffer_view) {
const DescriptorData data = {
.buffer_view = buffer_view
};
SetBinding(set, descriptor, data);
}
void PipelineCache::BindSampler(u32 set, u32 descriptor, vk::Sampler sampler) {
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.sampler = sampler
}
};
SetBinding(set, descriptor, data);
}
void PipelineCache::SetViewport(float x, float y, float width, float height) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f});
}
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}});
}
void PipelineCache::MarkDescriptorSetsDirty() {
descriptor_dirty.fill(true);
}
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_compare_mask);
command_buffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_write_mask);
command_buffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_reference);
if (instance.IsExtendedDynamicStateSupported()) {
command_buffer.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode));
command_buffer.setDepthCompareOpEXT(PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op));
command_buffer.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable);
command_buffer.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable);
command_buffer.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode));
command_buffer.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(info.rasterization.topology));
command_buffer.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable);
command_buffer.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack,
PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op));
}
}
void PipelineCache::SetBinding(u32 set, u32 binding, DescriptorData data) {
if (update_data[set][binding] != data) {
update_data[set][binding] = data;
descriptor_dirty[set] = true;
}
}
void PipelineCache::BuildLayout() {
std::array<vk::DescriptorSetLayoutBinding, MAX_DESCRIPTORS> set_bindings;
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
const auto& set = RASTERIZER_SETS[i];
for (u32 j = 0; j < set.binding_count; j++) {
vk::DescriptorType type = set.bindings[j];
set_bindings[j] = vk::DescriptorSetLayoutBinding{
.binding = j,
.descriptorType = type,
.descriptorCount = 1,
.stageFlags = ToVkStageFlags(type)
};
update_entries[j] = vk::DescriptorUpdateTemplateEntry{
.dstBinding = j,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = type,
.offset = j * sizeof(DescriptorData),
.stride = 0
};
}
const vk::DescriptorSetLayoutCreateInfo layout_info = {
.bindingCount = set.binding_count,
.pBindings = set_bindings.data()
};
// Create descriptor set layout
descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info);
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = set.binding_count,
.pDescriptorUpdateEntries = update_entries.data(),
.descriptorSetLayout = descriptor_set_layouts[i]
};
// Create descriptor set update template
update_templates[i] = device.createDescriptorUpdateTemplate(template_info);
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = RASTERIZER_SET_COUNT,
.pSetLayouts = descriptor_set_layouts.data(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr
};
layout = device.createPipelineLayout(layout_info);
}
vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
vk::Device device = instance.GetDevice();
u32 shader_count = 0;
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
for (std::size_t i = 0; i < current_shaders.size(); i++) {
vk::ShaderModule shader = current_shaders[i];
if (!shader) {
continue;
}
shader_stages[i] = vk::PipelineShaderStageCreateInfo{
.stage = ToVkShaderStage(i),
.module = shader,
.pName = "main"
};
}
/**
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing
* data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate.
* Since one instance is all we render, the shader will always read the single attribute.
*/
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
const auto& binding = info.vertex_layout.bindings[i];
bindings[i] = vk::VertexInputBindingDescription{
.binding = binding.binding,
.stride = binding.stride,
.inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance
: vk::VertexInputRate::eVertex
};
}
// Populate vertex attribute structures
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
const auto& attr = info.vertex_layout.attributes[i];
attributes[i] = vk::VertexInputAttributeDescription{
.location = attr.location,
.binding = attr.binding,
.format = ToVkAttributeFormat(attr),
.offset = attr.offset
};
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
.pVertexAttributeDescriptions = attributes.data()
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
.primitiveRestartEnable = false
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
.frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode),
.depthBiasEnable = false,
.lineWidth = 1.0f
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = false
};
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
.blendEnable = info.blending.blend_enable.Value(),
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
};
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = info.blending.logic_op_enable.Value(),
.logicOp = PicaToVK::LogicOp(info.blending.logic_op),
.attachmentCount = 1,
.pAttachments = &colorblend_attachment,
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}
};
const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}};
const vk::PipelineViewportStateCreateInfo viewport_info = {
.viewportCount = 1,
.pViewports = &placeholder_viewport,
.scissorCount = 1,
.pScissors = &placeholder_scissor,
};
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
const std::array dynamic_states = {
vk::DynamicState::eViewport,
vk::DynamicState::eScissor,
vk::DynamicState::eLineWidth,
vk::DynamicState::eStencilCompareMask,
vk::DynamicState::eStencilWriteMask,
vk::DynamicState::eStencilReference,
// VK_EXT_extended_dynamic_state
vk::DynamicState::eCullModeEXT,
vk::DynamicState::eDepthCompareOpEXT,
vk::DynamicState::eDepthTestEnableEXT,
vk::DynamicState::eDepthWriteEnableEXT,
vk::DynamicState::eFrontFaceEXT,
vk::DynamicState::ePrimitiveTopologyEXT,
vk::DynamicState::eStencilOpEXT,
vk::DynamicState::eStencilTestEnableEXT,
};
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = extended_dynamic_states ? 14u : 6u,
.pDynamicStates = dynamic_states.data()
};
const vk::StencilOpState stencil_op_state = {
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)
};
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
.depthBoundsTestEnable = false,
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
.front = stencil_op_state,
.back = stencil_op_state
};
const vk::GraphicsPipelineCreateInfo pipeline_info = {
.stageCount = shader_count,
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = layout,
.renderPass = renderpass_cache.GetRenderpass(info.color_attachment,
info.depth_attachment, false)
};
if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info);
result.result == vk::Result::eSuccess) {
return result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
UNREACHABLE();
}
return VK_NULL_HANDLE;
}
void PipelineCache::BindDescriptorSets() {
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
if (descriptor_dirty[i] || !descriptor_sets[i]) {
const vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_set_layouts[i]
};
vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i].data());
descriptor_sets[i] = set;
descriptor_dirty[i] = false;
}
}
// Bind the descriptor sets
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, RASTERIZER_SET_COUNT,
descriptor_sets.data(), 0, nullptr);
}
void PipelineCache::LoadDiskCache() {
const std::string cache_path =
FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "vulkan" + DIR_SEP "pipelines.bin";
FileUtil::IOFile cache_file{cache_path, "r"};
if (!cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "No pipeline cache found");
}
const u32 cache_file_size = cache_file.GetSize();
auto cache_data = std::vector<u8>(cache_file_size);
if (!cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
LOG_WARNING(Render_Vulkan, "Error during pipeline cache read");
return;
}
cache_file.Close();
const bool is_valid = ValidateData(cache_data.data(), cache_file_size);
const vk::PipelineCacheCreateInfo cache_info = {
.initialDataSize = is_valid ? cache_file_size : 0,
.pInitialData = cache_data.data()
};
vk::Device device = instance.GetDevice();
pipeline_cache = device.createPipelineCache(cache_info);
}
void PipelineCache::SaveDiskCache() {
const std::string cache_path =
FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "vulkan" + DIR_SEP "pipelines.bin";
FileUtil::IOFile cache_file{cache_path, "w"};
if (!cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing");
return;
}
vk::Device device = instance.GetDevice();
auto cache_data = device.getPipelineCacheData(pipeline_cache);
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
LOG_WARNING(Render_Vulkan, "Error during pipeline cache write");
return;
}
cache_file.Close();
}
bool PipelineCache::ValidateData(const u8* data, u32 size) {
if (size < sizeof(vk::PipelineCacheHeaderVersionOne)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");
return false;
}
vk::PipelineCacheHeaderVersionOne header;
std::memcpy(&header, data, sizeof(header));
if (header.headerSize < sizeof(header)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length");
return false;
}
if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version");
return false;
}
if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) {
LOG_ERROR(Render_Vulkan,
"Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})",
header.vendorID, vendor_id);
return false;
}
if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) {
LOG_ERROR(Render_Vulkan,
"Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})",
header.deviceID, device_id);
return false;
}
if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID");
return false;
}
return true;
}
} // namespace Vulkan

View File

@ -0,0 +1,248 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/bit_field.h"
#include "common/hash.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
#include "video_core/shader/shader_cache.h"
#include "video_core/regs.h"
namespace Vulkan {
constexpr u32 MAX_SHADER_STAGES = 3;
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
constexpr u32 MAX_VERTEX_BINDINGS = 16;
constexpr u32 MAX_DESCRIPTORS = 8;
constexpr u32 MAX_DESCRIPTOR_SETS = 6;
enum class AttribType : u32 {
Float = 0,
Int = 1,
Short = 2,
Byte = 3,
Ubyte = 4
};
/**
* The pipeline state is tightly packed with bitfields to reduce
* the overhead of hashing as much as possible
*/
union RasterizationState {
u8 value = 0;
BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology;
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
};
struct DepthStencilState {
union {
u32 value = 0;
BitField<0, 1, u32> depth_test_enable;
BitField<1, 1, u32> depth_write_enable;
BitField<2, 1, u32> stencil_test_enable;
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
};
// These are dynamic state so keep them separate
u8 stencil_reference;
u8 stencil_compare_mask;
u8 stencil_write_mask;
};
union BlendingState {
u32 value = 0;
BitField<0, 1, u32> blend_enable;
BitField<1, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor;
BitField<5, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor;
BitField<9, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq;
BitField<12, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor;
BitField<16, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor;
BitField<20, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq;
BitField<23, 4, u32> color_write_mask;
BitField<27, 1, u32> logic_op_enable;
BitField<28, 4, Pica::FramebufferRegs::LogicOp> logic_op;
};
union VertexBinding {
u16 value = 0;
BitField<0, 4, u16> binding;
BitField<4, 1, u16> fixed;
BitField<5, 11, u16> stride;
};
union VertexAttribute {
u32 value = 0;
BitField<0, 4, u32> binding;
BitField<4, 4, u32> location;
BitField<8, 3, AttribType> type;
BitField<11, 3, u32> size;
BitField<14, 11, u32> offset;
};
struct VertexLayout {
u8 binding_count;
u8 attribute_count;
std::array<VertexBinding, MAX_VERTEX_BINDINGS> bindings;
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
};
/**
* Information about a graphics/compute pipeline
*/
struct PipelineInfo {
VertexLayout vertex_layout{};
BlendingState blending{};
VideoCore::PixelFormat color_attachment = VideoCore::PixelFormat::RGBA8;
VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
RasterizationState rasterization{};
DepthStencilState depth_stencil{};
};
union DescriptorData {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
bool operator!=(const DescriptorData& other) const {
return std::memcmp(this, &other, sizeof(DescriptorData)) != 0;
}
};
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
/**
* Vulkan specialized PICA shader caches
*/
using ProgrammableVertexShaders =
Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule, &Compile, &GenerateVertexShader>;
using FixedGeometryShaders =
Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule, &Compile, &GenerateFixedGeometryShader>;
using FragmentShaders =
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
class Instance;
class TaskScheduler;
class RenderpassCache;
/**
* Stores a collection of rasterizer pipelines used during rendering.
* In addition handles descriptor set management.
*/
class PipelineCache {
public:
PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache);
~PipelineCache();
/// Binds a pipeline using the provided information
void BindPipeline(const PipelineInfo& info);
/// Binds a PICA decompiled vertex shader
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup);
/// Binds a passthrough vertex shader
void UseTrivialVertexShader();
/// Binds a PICA decompiled geometry shader
void UseFixedGeometryShader(const Pica::Regs& regs);
/// Binds a passthrough geometry shader
void UseTrivialGeometryShader();
/// Binds a fragment shader generated from PICA state
void UseFragmentShader(const Pica::Regs& regs);
/// Binds a texture to the specified descriptor
void BindTexture(u32 set, u32 binding, vk::ImageView view);
/// Binds a buffer to the specified descriptor
void BindBuffer(u32 set, u32 binding, vk::Buffer buffer, u32 offset, u32 size);
/// Binds a buffer to the specified descriptor
void BindTexelBuffer(u32 set, u32 binding, vk::BufferView buffer_view);
/// Binds a sampler to the specified descriptor
void BindSampler(u32 set, u32 binding, vk::Sampler sampler);
/// Sets the viewport rectangle to the provided values
void SetViewport(float x, float y, float width, float height);
/// Sets the scissor rectange to the provided values
void SetScissor(s32 x, s32 y, u32 width, u32 height);
/// Marks all descriptor sets as dirty
void MarkDescriptorSetsDirty();
private:
/// Binds a resource to the provided binding
void SetBinding(u32 set, u32 binding, DescriptorData data);
/// Applies dynamic pipeline state to the current command buffer
void ApplyDynamic(const PipelineInfo& info);
/// Builds the rasterizer pipeline layout
void BuildLayout();
/// Builds a rasterizer pipeline using the PipelineInfo struct
vk::Pipeline BuildPipeline(const PipelineInfo& info);
/// Builds descriptor sets that reference the currently bound resources
void BindDescriptorSets();
/// Loads the pipeline cache stored to disk
void LoadDiskCache();
/// Stores the generated pipeline cache to disk
void SaveDiskCache();
/// Ensures the disk data was generated from the same driver
bool ValidateData(const u8* data, u32 size);
private:
const Instance& instance;
TaskScheduler& scheduler;
RenderpassCache& renderpass_cache;
// Cached pipelines
vk::PipelineCache pipeline_cache;
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
vk::Pipeline current_pipeline{};
// Cached layouts for the rasterizer pipelines
vk::PipelineLayout layout;
std::array<vk::DescriptorSetLayout, MAX_DESCRIPTOR_SETS> descriptor_set_layouts;
std::array<vk::DescriptorUpdateTemplate, MAX_DESCRIPTOR_SETS> update_templates;
// Current data for the descriptor sets
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
// Bound shader modules
enum ProgramType : u32 {
VS = 0,
GS = 2,
FS = 1
};
std::array<vk::ShaderModule, MAX_SHADER_STAGES> current_shaders;
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
ProgrammableVertexShaders programmable_vertex_shaders;
FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders;
vk::ShaderModule trivial_vertex_shader;
};
} // namespace Vulkan

View File

@ -0,0 +1,130 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
// Include the vulkan platform specific header
#if defined(ANDROID) || defined (__ANDROID__)
#define VK_USE_PLATFORM_ANDROID_KHR 1
#elif defined(_WIN32)
#define VK_USE_PLATFORM_WIN32_KHR 1
#elif defined(__APPLE__)
#define VK_USE_PLATFORM_MACOS_MVK 1
#define VK_USE_PLATFORM_METAL_EXT 1
#else
#ifdef WAYLAND_DISPLAY
#define VK_USE_PLATFORM_WAYLAND_KHR 1
#else // wayland
#define VK_USE_PLATFORM_XLIB_KHR 1
#endif
#endif
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <vector>
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) {
const auto& window_info = emu_window.GetWindowInfo();
vk::SurfaceKHR surface;
#if VK_USE_PLATFORM_WIN32_KHR
if (window_info.type == Frontend::WindowSystemType::Windows) {
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
.hinstance = nullptr,
.hwnd = static_cast<HWND>(window_info.render_surface)
};
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
}
}
#elif VK_USE_PLATFORM_XLIB_KHR
if (window_info.type == Frontend::WindowSystemType::X11) {
const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
static_cast<Display*>(window_info.display_connection),
reinterpret_cast<Window>(window_info.render_surface)};
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
UNREACHABLE();
}
}
#elif VK_USE_PLATFORM_WAYLAND_KHR
if (window_info.type == Frontend::WindowSystemType::Wayland) {
const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
static_cast<wl_display*>(window_info.display_connection),
static_cast<wl_surface*>(window_info.render_surface)};
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
UNREACHABLE();
}
}
#endif
if (!surface) {
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
}
return surface;
}
inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
const auto properties = vk::enumerateInstanceExtensionProperties();
if (properties.empty()) {
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
return std::vector<const char*>{};
}
// Add the windowing system specific extension
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
case Frontend::WindowSystemType::Headless:
break;
#if VK_USE_PLATFORM_WIN32_KHR
case Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#elif VK_USE_PLATFORM_XLIB_KHR
case Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
#elif VK_USE_PLATFORM_WAYLAND_KHR
case Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
for (const char* extension : extensions) {
const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
return std::strcmp(extension, prop.extensionName) == 0;
});
if (iter == properties.end()) {
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
return std::vector<const char*>{};
}
}
return extensions;
}
} // namespace Vulkan

View File

@ -0,0 +1,173 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Vulkan {
vk::Format ToVkFormatColor(u32 index) {
switch (index) {
case 1: return vk::Format::eR8G8B8A8Unorm;
case 2: return vk::Format::eR8G8B8Unorm;
case 3: return vk::Format::eR5G5B5A1UnormPack16;
case 4: return vk::Format::eR5G6B5UnormPack16;
case 5: return vk::Format::eR4G4B4A4UnormPack16;
default: return vk::Format::eUndefined;
}
}
vk::Format ToVkFormatDepth(u32 index) {
switch (index) {
case 1: return vk::Format::eD16Unorm;
case 2: return vk::Format::eX8D24UnormPack32;
case 3: return vk::Format::eD24UnormS8Uint;
default: return vk::Format::eUndefined;
}
}
RenderpassCache::RenderpassCache(const Instance& instance) : instance{instance} {
// Pre-create all needed renderpasses by the renderer
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
if (color == 0 && depth == 0) {
continue;
}
const vk::Format color_format =
color == 0 ? vk::Format::eUndefined : instance.GetFormatAlternative(ToVkFormatColor(color));
const vk::Format depth_stencil_format =
depth == 0 ? vk::Format::eUndefined : instance.GetFormatAlternative(ToVkFormatDepth(depth));
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format,
vk::AttachmentLoadOp::eLoad,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal);
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format,
vk::AttachmentLoadOp::eClear,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal);
}
}
}
RenderpassCache::~RenderpassCache() {
vk::Device device = instance.GetDevice();
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
if (color == 0 && depth == 0) {
continue;
}
auto& load_pass = cached_renderpasses[color][depth][0];
auto& clear_pass = cached_renderpasses[color][depth][1];
// Destroy renderpasses
device.destroyRenderPass(load_pass);
device.destroyRenderPass(clear_pass);
}
}
device.destroyRenderPass(present_renderpass);
}
void RenderpassCache::CreatePresentRenderpass(vk::Format format) {
if (!present_renderpass) {
present_renderpass = CreateRenderPass(format, vk::Format::eUndefined,
vk::AttachmentLoadOp::eClear,
vk::ImageLayout::eUndefined,
vk::ImageLayout::ePresentSrcKHR);
}
}
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
bool is_clear) const {
const u32 color_index =
color == VideoCore::PixelFormat::Invalid ? 0 : static_cast<u32>(color);
const u32 depth_index =
depth == VideoCore::PixelFormat::Invalid ? 0 : (static_cast<u32>(depth) - 13);
ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS);
return cached_renderpasses[color_index][depth_index][is_clear];
}
vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const {
// Define attachments
u32 attachment_count = 0;
std::array<vk::AttachmentDescription, 2> attachments;
bool use_color = false;
vk::AttachmentReference color_attachment_ref{};
bool use_depth = false;
vk::AttachmentReference depth_attachment_ref{};
if (color != vk::Format::eUndefined) {
attachments[attachment_count] = vk::AttachmentDescription{
.format = color,
.loadOp = load_op,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
.initialLayout = initial_layout,
.finalLayout = final_layout
};
color_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++,
.layout = vk::ImageLayout::eColorAttachmentOptimal
};
use_color = true;
}
if (depth != vk::Format::eUndefined) {
attachments[attachment_count] = vk::AttachmentDescription{
.format = depth,
.loadOp = load_op,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal,
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
};
depth_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++,
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
};
use_depth = true;
}
// We also require only one subpass
const vk::SubpassDescription subpass = {
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = use_color ? 1u : 0u,
.pColorAttachments = &color_attachment_ref,
.pResolveAttachments = 0,
.pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr
};
const vk::RenderPassCreateInfo renderpass_info = {
.attachmentCount = attachment_count,
.pAttachments = attachments.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = nullptr
};
// Create the renderpass
vk::Device device = instance.GetDevice();
return device.createRenderPass(renderpass_info);
}
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,46 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class Swapchain;
constexpr u32 MAX_COLOR_FORMATS = 5;
constexpr u32 MAX_DEPTH_FORMATS = 3;
class RenderpassCache {
public:
RenderpassCache(const Instance& instance);
~RenderpassCache();
/// Creates the renderpass used when rendering to the swapchain
void CreatePresentRenderpass(vk::Format format);
/// Returns the renderpass associated with the color-depth format pair
vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
bool is_clear) const;
/// Returns the swapchain clear renderpass
vk::RenderPass GetPresentRenderpass() const {
return present_renderpass;
}
private:
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const;
private:
const Instance& instance;
vk::RenderPass present_renderpass{};
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2];
};
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,223 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include <glslang/Public/ShaderLang.h>
#include <glslang/SPIRV/GlslangToSpv.h>
#include <glslang/Include/ResourceLimits.h>
namespace Vulkan {
constexpr TBuiltInResource DefaultTBuiltInResource = {
.maxLights = 32,
.maxClipPlanes = 6,
.maxTextureUnits = 32,
.maxTextureCoords = 32,
.maxVertexAttribs = 64,
.maxVertexUniformComponents = 4096,
.maxVaryingFloats = 64,
.maxVertexTextureImageUnits = 32,
.maxCombinedTextureImageUnits = 80,
.maxTextureImageUnits = 32,
.maxFragmentUniformComponents = 4096,
.maxDrawBuffers = 32,
.maxVertexUniformVectors = 128,
.maxVaryingVectors = 8,
.maxFragmentUniformVectors = 16,
.maxVertexOutputVectors = 16,
.maxFragmentInputVectors = 15,
.minProgramTexelOffset = -8,
.maxProgramTexelOffset = 7,
.maxClipDistances = 8,
.maxComputeWorkGroupCountX = 65535,
.maxComputeWorkGroupCountY = 65535,
.maxComputeWorkGroupCountZ = 65535,
.maxComputeWorkGroupSizeX = 1024,
.maxComputeWorkGroupSizeY = 1024,
.maxComputeWorkGroupSizeZ = 64,
.maxComputeUniformComponents = 1024,
.maxComputeTextureImageUnits = 16,
.maxComputeImageUniforms = 8,
.maxComputeAtomicCounters = 8,
.maxComputeAtomicCounterBuffers = 1,
.maxVaryingComponents = 60,
.maxVertexOutputComponents = 64,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxFragmentInputComponents = 128,
.maxImageUnits = 8,
.maxCombinedImageUnitsAndFragmentOutputs = 8,
.maxCombinedShaderOutputResources = 8,
.maxImageSamples = 0,
.maxVertexImageUniforms = 0,
.maxTessControlImageUniforms = 0,
.maxTessEvaluationImageUniforms = 0,
.maxGeometryImageUniforms = 0,
.maxFragmentImageUniforms = 8,
.maxCombinedImageUniforms = 8,
.maxGeometryTextureImageUnits = 16,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
.maxGeometryUniformComponents = 1024,
.maxGeometryVaryingComponents = 64,
.maxTessControlInputComponents = 128,
.maxTessControlOutputComponents = 128,
.maxTessControlTextureImageUnits = 16,
.maxTessControlUniformComponents = 1024,
.maxTessControlTotalOutputComponents = 4096,
.maxTessEvaluationInputComponents = 128,
.maxTessEvaluationOutputComponents = 128,
.maxTessEvaluationTextureImageUnits = 16,
.maxTessEvaluationUniformComponents = 1024,
.maxTessPatchComponents = 120,
.maxPatchVertices = 32,
.maxTessGenLevel = 64,
.maxViewports = 16,
.maxVertexAtomicCounters = 0,
.maxTessControlAtomicCounters = 0,
.maxTessEvaluationAtomicCounters = 0,
.maxGeometryAtomicCounters = 0,
.maxFragmentAtomicCounters = 8,
.maxCombinedAtomicCounters = 8,
.maxAtomicCounterBindings = 1,
.maxVertexAtomicCounterBuffers = 0,
.maxTessControlAtomicCounterBuffers = 0,
.maxTessEvaluationAtomicCounterBuffers = 0,
.maxGeometryAtomicCounterBuffers = 0,
.maxFragmentAtomicCounterBuffers = 1,
.maxCombinedAtomicCounterBuffers = 1,
.maxAtomicCounterBufferSize = 16384,
.maxTransformFeedbackBuffers = 4,
.maxTransformFeedbackInterleavedComponents = 64,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.maxSamples = 4,
.maxMeshOutputVerticesNV = 256,
.maxMeshOutputPrimitivesNV = 512,
.maxMeshWorkGroupSizeX_NV = 32,
.maxMeshWorkGroupSizeY_NV = 1,
.maxMeshWorkGroupSizeZ_NV = 1,
.maxTaskWorkGroupSizeX_NV = 32,
.maxTaskWorkGroupSizeY_NV = 1,
.maxTaskWorkGroupSizeZ_NV = 1,
.maxMeshViewCountNV = 4,
.maxDualSourceDrawBuffersEXT = 1,
.limits = TLimits{
.nonInductiveForLoops = 1,
.whileLoops = 1,
.doWhileLoops = 1,
.generalUniformIndexing = 1,
.generalAttributeMatrixVectorIndexing = 1,
.generalVaryingIndexing = 1,
.generalSamplerIndexing = 1,
.generalVariableIndexing = 1,
.generalConstantMatrixVectorIndexing = 1,
}
};
EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
switch (stage) {
case vk::ShaderStageFlagBits::eVertex:
return EShLanguage::EShLangVertex;
case vk::ShaderStageFlagBits::eGeometry:
return EShLanguage::EShLangGeometry;
case vk::ShaderStageFlagBits::eFragment:
return EShLanguage::EShLangFragment;
case vk::ShaderStageFlagBits::eCompute:
return EShLanguage::EShLangCompute;
default:
LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
UNREACHABLE();
}
return EShLanguage::EShLangVertex;
}
bool InitializeCompiler() {
static bool glslang_initialized = false;
if (glslang_initialized) {
return true;
}
if (!glslang::InitializeProcess()) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
return false;
}
std::atexit([]() { glslang::FinalizeProcess(); });
glslang_initialized = true;
return true;
}
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
ShaderOptimization level) {
if (!InitializeCompiler()) {
return VK_NULL_HANDLE;
}
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
EShLanguage lang = ToEshShaderStage(stage);
int default_version = 450;
const char* pass_source_code = code.data();
int pass_source_code_length = static_cast<int>(code.size());
auto shader = std::make_unique<glslang::TShader>(lang);
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
glslang::TShader::ForbidIncluder includer;
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
return VK_NULL_HANDLE;
}
// Even though there's only a single shader, we still need to link it to generate SPV
auto program = std::make_unique<glslang::TProgram>();
program->addShader(shader.get());
if (!program->link(messages)) {
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
return VK_NULL_HANDLE;
}
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
std::vector<u32> out_code;
spv::SpvBuildLogger logger;
glslang::SpvOptions options;
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
if (level == ShaderOptimization::Debug) {
intermediate->addSourceText(pass_source_code, pass_source_code_length);
options.generateDebugInfo = true;
options.disableOptimizer = true;
options.optimizeSize = false;
options.disassemble = false;
options.validate = true;
} else {
options.disableOptimizer = false;
options.stripDebugInfo = true;
}
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
const std::string spv_messages = logger.getAllMessages();
if (!spv_messages.empty()) {
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
}
const vk::ShaderModuleCreateInfo shader_info = {
.codeSize = out_code.size() * sizeof(u32),
.pCode = out_code.data()
};
return device.createShaderModule(shader_info);
}
} // namespace Vulkan

View File

@ -0,0 +1,19 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
enum class ShaderOptimization {
High = 0,
Debug = 1
};
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage,
vk::Device device, ShaderOptimization level);
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,247 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include <optional>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace Vulkan {
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
struct PicaFSConfigState {
Pica::FramebufferRegs::CompareFunc alpha_test_func;
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
bool texture2_use_coord1;
std::array<TevStageConfigRaw, 6> tev_stages;
u8 combiner_buffer_input;
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
bool alphablend_enable;
Pica::FramebufferRegs::LogicOp logic_op;
struct {
struct {
unsigned num;
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
bool geometric_factor_0;
bool geometric_factor_1;
bool shadow_enable;
} light[8];
bool enable;
unsigned src_num;
Pica::LightingRegs::LightingBumpMode bump_mode;
unsigned bump_selector;
bool bump_renorm;
bool clamp_highlights;
Pica::LightingRegs::LightingConfig config;
bool enable_primary_alpha;
bool enable_secondary_alpha;
bool enable_shadow;
bool shadow_primary;
bool shadow_secondary;
bool shadow_invert;
bool shadow_alpha;
unsigned shadow_selector;
struct {
bool enable;
bool abs_input;
Pica::LightingRegs::LightingLutInput type;
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
bool enable;
u32 coord;
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
bool separate_alpha;
bool noise_enable;
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
u32 lut_width;
u32 lut_offset0;
u32 lut_offset1;
u32 lut_offset2;
u32 lut_offset3;
u32 lod_min;
u32 lod_max;
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
bool shadow_rendering;
bool shadow_texture_orthographic;
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*/
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
/// Construct a PicaFSConfig with the given Pica register configuration.
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
};
/**
* This struct contains common information to identify a GL vertex/geometry shader generated from
* PICA vertex/geometry shader.
*/
struct PicaShaderConfigCommon {
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
u64 program_hash;
u64 swizzle_hash;
u32 main_offset;
bool sanitize_mul;
u32 num_outputs;
// output_map[output register index] -> output attribute index
std::array<u32, 16> output_map;
};
/**
* This struct contains information to identify a GL vertex shader generated from PICA vertex
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
state.Init(regs, setup);
}
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
state = conf;
}
};
struct PicaGSConfigCommonRaw {
void Init(const Pica::Regs& regs);
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
* shader pipeline
*/
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
state.Init(regs);
}
};
/**
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
* and directly passes them to the fragment shader.
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateTrivialVertexShader();
/**
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code; boost::none on failure
*/
std::optional<std::string> GenerateVertexShader(
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config);
/**
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
* @returns String of the shader source code
*/
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config);
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const PicaFSConfig& config);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::PicaFSConfig> {
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaVSConfig> {
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaFixedGSConfig> {
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@ -0,0 +1,241 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <algorithm>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
std::pair<vk::AccessFlags, vk::PipelineStageFlags> result{};
switch (usage) {
case vk::BufferUsageFlagBits::eVertexBuffer:
result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead,
vk::PipelineStageFlagBits::eVertexInput);
break;
case vk::BufferUsageFlagBits::eIndexBuffer:
result = std::make_pair(vk::AccessFlagBits::eIndexRead,
vk::PipelineStageFlagBits::eVertexInput);
case vk::BufferUsageFlagBits::eUniformBuffer:
result = std::make_pair(vk::AccessFlagBits::eUniformRead,
vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eGeometryShader |
vk::PipelineStageFlagBits::eFragmentShader);
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
result = std::make_pair(vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eFragmentShader);
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
}
return result;
}
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
: instance{instance} {
const vk::BufferCreateInfo buffer_info = {
.size = size,
.usage = usage
};
const VmaAllocationCreateInfo alloc_create_info = {
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
&unsafe_buffer, &allocation, &alloc_info);
buffer = vk::Buffer{unsafe_buffer};
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
}
StagingBuffer::~StagingBuffer() {
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
}
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info)
: instance{instance}, scheduler{scheduler}, info{info},
staging{instance, info.size, vk::BufferUsageFlagBits::eTransferSrc} {
const vk::BufferCreateInfo buffer_info = {
.size = info.size,
.usage = info.usage | vk::BufferUsageFlagBits::eTransferDst
};
const VmaAllocationCreateInfo alloc_create_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
&unsafe_buffer, &allocation, &alloc_info);
buffer = vk::Buffer{unsafe_buffer};
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < info.views.size(); i++) {
if (info.views[i] == vk::Format::eUndefined) {
view_count = i;
break;
}
const vk::BufferViewCreateInfo view_info = {
.buffer = buffer,
.format = info.views[i],
.range = info.size
};
views[i] = device.createBufferView(view_info);
}
available_size = info.size;
}
StreamBuffer::~StreamBuffer() {
if (buffer) {
vk::Device device = instance.GetDevice();
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
for (u32 i = 0; i < view_count; i++) {
device.destroyBufferView(views[i]);
}
}
}
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
ASSERT(size <= info.size && alignment <= info.size);
if (alignment > 0) {
buffer_offset = Common::AlignUp(buffer_offset, alignment);
}
// Have we run out of available space?
bool invalidate = false;
if (available_size < size) {
// Flush any pending writes before continuing
Flush();
// If we are at the end of the buffer, start over
if (buffer_offset + size > info.size) {
Invalidate();
invalidate = true;
}
// Try to garbage collect old regions
if (!UnlockFreeRegions(size)) {
// Nuclear option: stall the GPU to remove all the locks
LOG_WARNING(Render_Vulkan, "Buffer GPU stall");
Invalidate();
regions.clear();
available_size = info.size;
}
}
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
return std::make_tuple(mapped, buffer_offset, invalidate);
}
void StreamBuffer::Commit(u32 size) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
auto [access_mask, stage_mask] = ToVkAccessStageFlags(info.usage);
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = access_mask,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = buffer,
.offset = buffer_offset,
.size = size
};
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
buffer_offset += size;
available_size -= size;
}
void StreamBuffer::Flush() {
const u32 flush_size = buffer_offset - flush_start;
if (flush_size > 0) {
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
VmaAllocator allocator = instance.GetAllocator();
const u32 flush_size = buffer_offset - flush_start;
const vk::BufferCopy copy_region = {
.srcOffset = flush_start,
.dstOffset = flush_start,
.size = flush_size
};
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
// Lock the region
const LockedRegion region = {
.size = flush_size,
.fence_counter = scheduler.GetFenceCounter()
};
regions.emplace(flush_start, region);
flush_start = buffer_offset;
}
}
void StreamBuffer::Invalidate() {
buffer_offset = 0;
flush_start = 0;
}
bool StreamBuffer::UnlockFreeRegions(u32 target_size) {
available_size = 0;
// Free regions that don't need waiting
auto it = regions.lower_bound(buffer_offset);
while (it != regions.end()) {
const auto& [offset, region] = *it;
if (region.fence_counter <= scheduler.GetFenceCounter()) {
available_size += region.size;
it = regions.erase(it);
}
else {
break;
}
}
// If that wasn't enough, try waiting for some fences
while (available_size < target_size) {
const auto& [offset, region] = *it;
if (region.fence_counter > scheduler.GetFenceCounter()) {
scheduler.WaitFence(region.fence_counter);
}
available_size += region.size;
it = regions.erase(it);
}
return available_size >= target_size;
}
} // namespace Vulkan

View File

@ -0,0 +1,87 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <map>
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class TaskScheduler;
constexpr u32 MAX_BUFFER_VIEWS = 3;
struct BufferInfo {
u32 size = 0;
vk::BufferUsageFlagBits usage{};
std::array<vk::Format, MAX_BUFFER_VIEWS> views{};
};
struct LockedRegion {
u32 size = 0;
u64 fence_counter = 0;
};
struct StagingBuffer {
StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
~StagingBuffer();
const Instance& instance;
vk::Buffer buffer{};
VmaAllocation allocation{};
std::span<std::byte> mapped{};
};
class StreamBuffer {
public:
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info);
~StreamBuffer();
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
/// Commits size bytes from the currently mapped staging memory
void Commit(u32 size = 0);
/// Flushes staging memory to the GPU buffer
void Flush();
/// Returns the Vulkan buffer handle
vk::Buffer GetHandle() const {
return buffer;
}
/// Returns an immutable reference to the requested buffer view
const vk::BufferView& GetView(u32 index = 0) const {
ASSERT(index < view_count);
return views[index];
}
private:
/// Invalidates the buffer offsets
void Invalidate();
/// Removes the lock on regions whose fence counter has been reached by the GPU
bool UnlockFreeRegions(u32 target_size);
private:
const Instance& instance;
TaskScheduler& scheduler;
BufferInfo info{};
StagingBuffer staging;
vk::Buffer buffer{};
VmaAllocation allocation{};
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
u32 view_count = 0;
u32 buffer_offset = 0;
u32 flush_start = 0;
s32 available_size = 0;
std::map<u32, LockedRegion> regions;
};
} // namespace Vulkan

View File

@ -0,0 +1,237 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <algorithm>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
namespace Vulkan {
Swapchain::Swapchain(const Instance& instance, CommandScheduler& scheduler,
RenderpassCache& renderpass_cache, vk::SurfaceKHR surface)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, surface{surface} {
// Set the surface format early for RenderpassCache to create the present renderpass
Configure(0, 0);
renderpass_cache.CreatePresentRenderpass(surface_format.format);
}
Swapchain::~Swapchain() {
vk::Device device = instance.GetDevice();
device.destroySemaphore(render_finished);
device.destroySemaphore(image_available);
device.destroySwapchainKHR(swapchain);
}
void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
is_outdated = false;
is_suboptimal = false;
// Fetch information about the provided surface
Configure(width, height);
const std::array queue_family_indices = {
instance.GetGraphicsQueueFamilyIndex(),
instance.GetPresentQueueFamilyIndex(),
};
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
const vk::SharingMode sharing_mode =
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
const vk::SwapchainCreateInfoKHR swapchain_info = {
.surface = surface,
.minImageCount = image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
.imageSharingMode = sharing_mode,
.queueFamilyIndexCount = queue_family_indices_count,
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.presentMode = present_mode,
.clipped = true,
.oldSwapchain = swapchain
};
vk::Device device = instance.GetDevice();
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
// If an old swapchain exists, destroy it and move the new one to its place.
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
device.destroySwapchainKHR(old_swapchain);
}
// Create sync objects if not already created
if (!image_available) {
image_available = device.createSemaphore({});
}
if (!render_finished) {
render_finished = device.createSemaphore({});
}
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
auto images = device.getSwapchainImagesKHR(swapchain);
// Destroy the previous images
for (auto& image : swapchain_images) {
device.destroyImageView(image.image_view);
device.destroyFramebuffer(image.framebuffer);
}
swapchain_images.clear();
std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image {
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = vk::ImageViewType::e2D,
.format = surface_format.format,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
}
};
vk::ImageView image_view = device.createImageView(view_info);
const std::array attachments{image_view};
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = present_renderpass,
.attachmentCount = 1,
.pAttachments = attachments.data(),
.width = extent.width,
.height = extent.height,
.layers = 1
};
vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info);
return Image{
.image = image,
.image_view = image_view,
.framebuffer = framebuffer
};
});
}
// Wait for maximum of 1 second
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
void Swapchain::AcquireNextImage() {
vk::Device device = instance.GetDevice();
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE,
&current_image);
switch (result) {
case vk::Result::eSuccess:
break;
case vk::Result::eSuboptimalKHR:
is_suboptimal = true;
break;
case vk::Result::eErrorOutOfDateKHR:
is_outdated = true;
break;
default:
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
break;
}
}
void Swapchain::Present() {
const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1,
.pWaitSemaphores = &render_finished,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &current_image
};
vk::Queue present_queue = instance.GetPresentQueue();
vk::Result result = present_queue.presentKHR(present_info);
switch (result) {
case vk::Result::eSuccess:
break;
case vk::Result::eSuboptimalKHR:
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case vk::Result::eErrorOutOfDateKHR:
is_outdated = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
break;
}
current_frame = (current_frame + 1) % swapchain_images.size();
}
void Swapchain::Configure(u32 width, u32 height) {
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
// Choose surface format
auto formats = physical.getSurfaceFormatsKHR(surface);
surface_format = formats[0];
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
surface_format.format = vk::Format::eB8G8R8A8Unorm;
} else {
auto it = std::ranges::find_if(formats, [](vk::SurfaceFormatKHR format) -> bool {
return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
format.format == vk::Format::eB8G8R8A8Unorm;
});
if (it == formats.end()) {
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
} else {
surface_format = *it;
}
}
// Checks if a particular mode is supported, if it is, returns that mode.
auto modes = physical.getSurfacePresentModesKHR(surface);
// FIFO is guaranteed by the Vulkan standard to be available
present_mode = vk::PresentModeKHR::eFifo;
auto iter = std::ranges::find_if(modes, [](vk::PresentModeKHR mode) {
return vk::PresentModeKHR::eMailbox == mode;
});
// Prefer Mailbox if present for lowest latency
if (iter != modes.end()) {
present_mode = vk::PresentModeKHR::eMailbox;
}
// Query surface extent
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
extent.width = std::clamp(width, capabilities.minImageExtent.width,
capabilities.maxImageExtent.width);
extent.height = std::clamp(height, capabilities.minImageExtent.height,
capabilities.maxImageExtent.height);
}
// Select number of images in swap chain, we prefer one buffer in the background to work on
image_count = capabilities.minImageCount + 1;
if (capabilities.maxImageCount > 0) {
image_count = std::min(image_count, capabilities.maxImageCount);
}
// Prefer identity transform if possible
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & transform)) {
transform = capabilities.currentTransform;
}
}
} // namespace Vulkan

View File

@ -0,0 +1,101 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class CommandScheduler;
class RenderpassCache;
class Swapchain {
public:
Swapchain(const Instance& instance, CommandScheduler& scheduler,
RenderpassCache& renderpass_cache,vk::SurfaceKHR surface);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool vsync_enabled);
/// Acquires the next image in the swapchain.
void AcquireNextImage();
/// Presents the current image and move to the next one
void Present();
/// Returns current swapchain state
vk::Extent2D GetExtent() const {
return extent;
}
/// Returns the swapchain surface
vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Returns the swapchain format
vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
}
/// Returns the Vulkan swapchain handle
vk::SwapchainKHR GetHandle() const {
return swapchain;
}
/// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes
vk::Semaphore GetAvailableSemaphore() const {
return image_available;
}
/// Returns the semaphore that will signal when the current image will be presented
vk::Semaphore GetPresentSemaphore() const {
return render_finished;
}
/// Returns true when the swapchain should be recreated
bool NeedsRecreation() const {
return is_suboptimal || is_outdated;
}
private:
void Configure(u32 width, u32 height);
private:
const Instance& instance;
CommandScheduler& scheduler;
RenderpassCache& renderpass_cache;
vk::SwapchainKHR swapchain{};
vk::SurfaceKHR surface{};
// Swapchain properties
vk::SurfaceFormatKHR surface_format;
vk::PresentModeKHR present_mode;
vk::Extent2D extent;
vk::SurfaceTransformFlagBitsKHR transform;
u32 image_count;
struct Image {
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
};
// Swapchain state
std::vector<Image> swapchain_images;
vk::Semaphore image_available{};
vk::Semaphore render_finished{};
u32 current_image = 0;
u32 current_frame = 0;
bool vsync_enabled = false;
bool is_outdated = true;
bool is_suboptimal = true;
};
} // namespace Vulkan

View File

@ -0,0 +1,178 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo command_pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
};
command_pool = device.createCommandPool(command_pool_info);
constexpr std::array pool_sizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024}
};
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
.maxSets = 2048,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data()
};
const vk::CommandBufferAllocateInfo buffer_info = {
.commandPool = command_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
};
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
for (std::size_t i = 0; i < commands.size(); i++) {
commands[i] = ExecutionSlot{
.fence = device.createFence({}),
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
.render_command_buffer = command_buffers[2 * i],
.upload_command_buffer = command_buffers[2 * i + 1],
};
}
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
// Begin first command
auto& command = commands[current_command];
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
}
TaskScheduler::~TaskScheduler() {
// Submit any remaining work
Submit(true, false);
vk::Device device = instance.GetDevice();
for (const auto& command : commands) {
device.destroyFence(command.fence);
device.destroyDescriptorPool(command.descriptor_pool);
}
device.destroyCommandPool(command_pool);
}
void TaskScheduler::Synchronize(u32 slot) {
const auto& command = commands[slot];
vk::Device device = instance.GetDevice();
if (command.fence_counter > completed_fence_counter) {
if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Waiting for fences failed!");
}
completed_fence_counter = command.fence_counter;
}
device.resetFences(command.fence);
device.resetDescriptorPool(command.descriptor_pool);
}
void TaskScheduler::WaitFence(u32 counter) {
for (u32 i = 0; i < SCHEDULER_COMMAND_COUNT; i++) {
if (commands[i].fence_counter == counter) {
return Synchronize(i);
}
}
UNREACHABLE_MSG("Invalid fence counter!");
}
void TaskScheduler::Submit(bool wait_completion, bool begin_next,
vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) {
const auto& command = commands[current_command];
command.render_command_buffer.end();
if (command.use_upload_buffer) {
command.upload_command_buffer.end();
}
u32 command_buffer_count = 0;
std::array<vk::CommandBuffer, 2> command_buffers;
if (command.use_upload_buffer) {
command_buffers[command_buffer_count++] = command.upload_command_buffer;
}
command_buffers[command_buffer_count++] = command.render_command_buffer;
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
const vk::PipelineStageFlags wait_stage_masks =
vk::PipelineStageFlagBits::eColorAttachmentOutput;
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = &wait_semaphore,
.pWaitDstStageMask = &wait_stage_masks,
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = &signal_semaphore,
};
vk::Queue queue = instance.GetGraphicsQueue();
queue.submit(submit_info, command.fence);
// Block host until the GPU catches up
if (wait_completion) {
Synchronize(current_command);
}
// Switch to next cmdbuffer.
if (begin_next) {
SwitchSlot();
}
}
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
auto& command = commands[current_command];
if (!command.use_upload_buffer) {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
command.upload_command_buffer.begin(begin_info);
command.use_upload_buffer = true;
}
return command.upload_command_buffer;
}
void TaskScheduler::SwitchSlot() {
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
auto& command = commands[current_command];
// Wait for the GPU to finish with all resources for this command.
Synchronize(current_command);
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
// Begin the next command buffer.
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
command.use_upload_buffer = false;
}
} // namespace Vulkan

View File

@ -0,0 +1,82 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <array>
#include <functional>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
class Buffer;
class Instance;
class TaskScheduler {
public:
TaskScheduler(const Instance& instance);
~TaskScheduler();
/// Blocks the host until the current command completes execution
void Synchronize(u32 slot);
/// Waits for the fence counter to be reached by the GPU
void WaitFence(u32 counter);
/// Submits the current command to the graphics queue
void Submit(bool wait_completion = false, bool begin_next = true,
vk::Semaphore wait = VK_NULL_HANDLE,
vk::Semaphore signal = VK_NULL_HANDLE);
/// Returns the command buffer used for early upload operations.
vk::CommandBuffer GetUploadCommandBuffer();
/// Returns the command buffer used for rendering
vk::CommandBuffer GetRenderCommandBuffer() const {
return commands[current_command].render_command_buffer;
}
/// Returns the current descriptor pool
vk::DescriptorPool GetDescriptorPool() const {
return commands[current_command].descriptor_pool;
}
/// Returns the index of the current command slot
u32 GetCurrentSlotIndex() const {
return current_command;
}
/// Returns the last completed fence counter
u64 GetFenceCounter() const {
return completed_fence_counter;
}
private:
/// Activates the next command slot and optionally waits for its completion
void SwitchSlot();
private:
const Instance& instance;
u64 next_fence_counter = 1;
u64 completed_fence_counter = 0;
struct ExecutionSlot {
bool use_upload_buffer = false;
u64 fence_counter = 0;
vk::Fence fence{};
vk::DescriptorPool descriptor_pool;
vk::CommandBuffer render_command_buffer{};
vk::CommandBuffer upload_command_buffer{};
};
vk::CommandPool command_pool{};
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands;
u32 current_command = 0;
};
} // namespace Vulkan

View File

@ -0,0 +1,562 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
switch (format) {
case VideoCore::PixelFormat::RGBA8:
return vk::Format::eR8G8B8A8Unorm;
case VideoCore::PixelFormat::RGB8:
return vk::Format::eR8G8B8Unorm;
case VideoCore::PixelFormat::RGB5A1:
return vk::Format::eR5G5B5A1UnormPack16;
case VideoCore::PixelFormat::RGB565:
return vk::Format::eR5G6B5UnormPack16;
case VideoCore::PixelFormat::RGBA4:
return vk::Format::eR4G4B4A4UnormPack16;
case VideoCore::PixelFormat::D16:
return vk::Format::eD16Unorm;
case VideoCore::PixelFormat::D24:
return vk::Format::eX8D24UnormPack32;
case VideoCore::PixelFormat::D24S8:
return vk::Format::eD24UnormS8Uint;
case VideoCore::PixelFormat::Invalid:
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
return vk::Format::eUndefined;
default:
// Use default case for the texture formats
return vk::Format::eR8G8B8A8Unorm;
}
}
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
switch (type) {
case VideoCore::SurfaceType::Color:
case VideoCore::SurfaceType::Texture:
case VideoCore::SurfaceType::Fill:
return vk::ImageAspectFlagBits::eColor;
case VideoCore::SurfaceType::Depth:
return vk::ImageAspectFlagBits::eDepth;
case VideoCore::SurfaceType::DepthStencil:
return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
default:
UNREACHABLE_MSG("Invalid surface type!");
}
return vk::ImageAspectFlagBits::eColor;
}
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
for (auto& buffer : staging_buffers) {
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst);
}
}
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const u32 current_slot = scheduler.GetCurrentSlotIndex();
const u32 offset = staging_offsets[current_slot];
if (offset + size > STAGING_BUFFER_SIZE) {
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
UNREACHABLE();
}
const auto& buffer = staging_buffers[current_slot];
return StagingData{
.buffer = buffer->buffer,
.mapped = buffer->mapped.subspan(offset, size),
.buffer_offset = offset
};
}
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
staging_offsets[new_slot] = 0;
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) {
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
const VideoCore::HostTextureTag key = {
.format = format,
.width = width,
.height = height,
.layers = layers
};
// Attempt to recycle an unused allocation
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
ImageAlloc alloc = std::move(it->second);
texture_recycler.erase(it);
return alloc;
}
// Create a new allocation
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
const vk::ImageCreateInfo image_info = {
.flags = type == VideoCore::TextureType::CubeMap ?
vk::ImageCreateFlagBits::eCubeCompatible :
vk::ImageCreateFlags{},
.imageType = vk::ImageType::e2D,
.format = vk_format,
.extent = {width, height, 1},
.mipLevels = std::bit_width(std::max(width, height)),
.arrayLayers = layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = GetImageUsage(aspect),
};
const VmaAllocationCreateInfo alloc_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
};
VkImage unsafe_image{};
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VmaAllocation allocation;
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
&unsafe_image, &allocation, nullptr);
if (result != VK_SUCCESS) {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
UNREACHABLE();
}
vk::Image image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = type == VideoCore::TextureType::CubeMap ?
vk::ImageViewType::eCube :
vk::ImageViewType::e2D,
.format = vk_format,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
}
};
vk::Device device = instance.GetDevice();
vk::ImageView image_view = device.createImageView(view_info);
return ImageAlloc{
.image = image,
.image_view = image_view,
.allocation = allocation,
};
}
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1);
// For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage
if (clear.texture_rect == surface.GetScaledRect()) {
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
if (aspect & vk::ImageAspectFlagBits::eColor) {
const vk::ClearColorValue clear_color = {
.float32 = std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]})
};
const vk::ImageSubresourceRange range = {
.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
};
command_buffer.clearColorImage(surface.image, vk::ImageLayout::eTransferDstOptimal,
clear_color, range);
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
const vk::ClearDepthStencilValue clear_depth = {
.depth = value.depth,
.stencil = value.stencil
};
const vk::ImageSubresourceRange range = {
.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
};
command_buffer.clearDepthStencilImage(surface.image, vk::ImageLayout::eTransferDstOptimal,
clear_depth, range);
}
} else {
LOG_WARNING(Render_Vulkan, "Partial clears are unimplemented!");
}
return true;
}
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
const vk::ImageCopy image_copy = {
.srcSubresource = {
.aspectMask = ToVkAspect(source.type),
.mipLevel = copy.src_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
.dstSubresource = {
.aspectMask = ToVkAspect(dest.type),
.mipLevel = copy.dst_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
.extent = {copy.extent.width, copy.extent.height, 1}
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
command_buffer.copyImage(source.image, vk::ImageLayout::eTransferSrcOptimal,
dest.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
return true;
}
bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
const std::array source_offsets = {
vk::Offset3D{static_cast<s32>(blit.src_rect.left), static_cast<s32>(blit.src_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.src_rect.right), static_cast<s32>(blit.src_rect.top), 1}
};
const std::array dest_offsets = {
vk::Offset3D{static_cast<s32>(blit.dst_rect.left), static_cast<s32>(blit.dst_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.dst_rect.right), static_cast<s32>(blit.dst_rect.top), 1}
};
const vk::ImageBlit blit_area = {
.srcSubresource = {
.aspectMask = ToVkAspect(source.type),
.mipLevel = blit.src_level,
.baseArrayLayer = blit.src_layer,
.layerCount = 1
},
.srcOffsets = source_offsets,
.dstSubresource = {
.aspectMask = ToVkAspect(dest.type),
.mipLevel = blit.dst_level,
.baseArrayLayer = blit.dst_layer,
.layerCount = 1
},
.dstOffsets = dest_offsets
};
command_buffer.blitImage(source.image, vk::ImageLayout::eTransferSrcOptimal,
dest.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
return true;
}
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
// TODO: Investigate AMD single pass downsampler
s32 current_width = surface.GetScaledWidth();
s32 current_height = surface.GetScaledHeight();
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) {
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {
vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width, current_height, 1}
};
const std::array dest_offsets = {
vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
current_height > 1 ? current_height / 2 : 1, 1}
};
const vk::ImageBlit blit_area = {
.srcSubresource = {
.aspectMask = aspect,
.mipLevel = i - 1,
.baseArrayLayer = 0,
.layerCount = 1
},
.srcOffsets = source_offsets,
.dstSubresource = {
.aspectMask = aspect,
.mipLevel = i,
.baseArrayLayer = 0,
.layerCount = 1
},
.dstOffsets = dest_offsets
};
command_buffer.blitImage(surface.image, vk::ImageLayout::eTransferSrcOptimal,
surface.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
}
}
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
const ImageAlloc alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(),
params.pixel_format, texture_type);
allocation = alloc.allocation;
image_view = alloc.image_view;
image = alloc.image;
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload);
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledUpload(upload);
} else {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = upload.texture_rect;
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = upload.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
command_buffer.copyBufferToImage(staging.buffer, image,
vk::ImageLayout::eTransferDstOptimal,
copy_region);
}
InvalidateAllWatcher();
}
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Download);
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledDownload(download);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = aspect,
.mipLevel = download.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
if (aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += staging.mapped.size();
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
// Copy pixel data to the staging buffer
command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
scheduler.Submit(true);
}
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
/*const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture);
// Blit the scaled rectangle to the unscaled texture
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
const auto& tuple = runtime.GetFormatTuple(pixel_format);
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
}*/
}
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
/*const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = upload.texture_rect.GetHeight();
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const auto& filterer = runtime.GetFilterer();
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture);
// If filtering fails, resort to normal blitting
glBlitFramebuffer(0, 0, rect_width, rect_height,
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.right, upload.texture_rect.top,
MakeBufferMask(type), GL_LINEAR);
}*/
}
void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
u32 level, u32 level_count) {
if (new_layout == layout) {
return;
}
struct LayoutInfo {
vk::AccessFlags access;
vk::PipelineStageFlags stage;
};
// Get optimal transition settings for every image layout. Settings taken from Dolphin
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
LayoutInfo info;
switch (layout) {
case vk::ImageLayout::eUndefined:
// Layout undefined therefore contents undefined, and we don't care what happens to it.
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
break;
case vk::ImageLayout::ePreinitialized:
// Image has been pre-initialized by the host, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eHostWrite;
info.stage = vk::PipelineStageFlagBits::eHost;
break;
case vk::ImageLayout::eColorAttachmentOptimal:
// Image was being used as a color attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
break;
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests;
break;
case vk::ImageLayout::ePresentSrcKHR:
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
break;
case vk::ImageLayout::eShaderReadOnlyOptimal:
// Image was being used as a shader resource, make sure all reads have finished.
info.access = vk::AccessFlagBits::eShaderRead;
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
break;
case vk::ImageLayout::eTransferSrcOptimal:
// Image was being used as a copy source, ensure all reads have finished.
info.access = vk::AccessFlagBits::eTransferRead;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
case vk::ImageLayout::eTransferDstOptimal:
// Image was being used as a copy destination, ensure all writes have finished.
info.access = vk::AccessFlagBits::eTransferWrite;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
UNREACHABLE();
}
return info;
};
LayoutInfo source = GetLayoutInfo(layout);
LayoutInfo dest = GetLayoutInfo(new_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = layout,
.newLayout = new_layout,
.image = image,
.subresourceRange = {aspect, level, level_count, 0, 1}
};
command_buffer.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion,
{}, {}, barrier);
layout = new_layout;
}
} // namespace Vulkan

View File

@ -0,0 +1,129 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <set>
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/rasterizer_cache/types.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
struct StagingData {
vk::Buffer buffer;
std::span<std::byte> mapped{};
u32 buffer_offset = 0;
};
struct ImageAlloc {
vk::Image image;
vk::ImageView image_view;
VmaAllocation allocation;
};
class Instance;
class Surface;
/**
* Provides texture manipulation functions to the rasterizer cache
* Separating this into a class makes it easier to abstract graphics API code
*/
class TextureRuntime {
friend class Surface;
public:
TextureRuntime(const Instance& instance, TaskScheduler& scheduler);
~TextureRuntime() = default;
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
StagingData FindStaging(u32 size, bool upload);
/// Performs operations that need to be done on every scheduler slot switch
void OnSlotSwitch(u32 new_slot);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value);
/// Copies a rectangle of src_tex to another rectange of dst_rect
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
/// Blits a rectangle of src_tex to another rectange of dst_rect
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface, u32 max_level);
private:
/// Allocates a vulkan image possibly resusing an existing one
ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
/// Returns the current Vulkan instance
const Instance& GetInstance() const {
return instance;
}
/// Returns the current Vulkan scheduler
TaskScheduler& GetScheduler() const {
return scheduler;
}
private:
const Instance& instance;
TaskScheduler& scheduler;
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
std::unordered_map<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
};
class Surface : public VideoCore::SurfaceBase<Surface> {
friend class TextureRuntime;
public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
~Surface() override = default;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
/// Downloads pixel data to staging from a rectangle region of the surface texture
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
private:
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
/// Uploads pixel data to scaled texture
void ScaledUpload(const VideoCore::BufferTextureCopy& upload);
/// Overrides the image layout of the mip level range
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
/// Transitions the mip level range of the surface to new_layout
void TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
u32 level, u32 level_count);
private:
TextureRuntime& runtime;
const Instance& instance;
TaskScheduler& scheduler;
vk::Image image{};
vk::ImageView image_view{};
VmaAllocation allocation = nullptr;
vk::Format internal_format = vk::Format::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
};
struct Traits {
using Runtime = TextureRuntime;
using Surface = Surface;
};
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
} // namespace Vulkan

View File

@ -0,0 +1,97 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <unordered_map>
#include <tuple>
#include "video_core/shader/shader.h"
namespace Pica::Shader {
template <typename ShaderType>
using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
std::string(*CodeGenerator)(const KeyType&)>
class ShaderCache {
public:
ShaderCache() {}
~ShaderCache() = default;
/// Returns a shader handle generated from the provided config
template <typename... Args>
auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult<ShaderType> {
auto [iter, new_shader] = shaders.emplace(config, ShaderType{});
auto& shader = iter->second;
if (new_shader) {
std::string code = CodeGenerator(config);
shader = ModuleCompiler(code, args...);
return std::make_pair(shader, code);
}
return std::make_pair(shader, std::nullopt);
}
void Inject(const KeyType& key, ShaderType&& shader) {
shaders.emplace(key, std::move(shader));
}
private:
std::unordered_map<KeyType, ShaderType> shaders;
};
/**
* This is a cache designed for shaders translated from PICA shaders. The first cache matches the
* config structure like a normal cache does. On cache miss, the second cache matches the generated
* GLSL code. The configuration is like this because there might be leftover code in the PICA shader
* program buffer from the previous shader, which is hashed into the config, resulting several
* different config values from the same shader program.
*/
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
std::optional<std::string>(*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyType&)>
class ShaderDoubleCache {
public:
ShaderDoubleCache() = default;
~ShaderDoubleCache() = default;
template <typename... Args>
auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) -> ShaderCacheResult<ShaderType> {
if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
auto code = CodeGenerator(setup, key);
if (!code) {
shader_map[key] = nullptr;
return std::make_pair(ShaderType{}, std::nullopt);
}
std::string& program = code.value();
auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{});
auto& shader = iter->second;
if (new_shader) {
shader = ModuleCompiler(program, args...);
}
shader_map[key] = &shader;
return std::make_pair(shader, std::move(program));
} else {
return std::make_pair(*map_iter->second, std::nullopt);
}
}
void Inject(const KeyType& key, std::string decomp, ShaderType&& program) {
const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first;
auto& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
private:
std::unordered_map<KeyType, ShaderType*> shader_map;
std::unordered_map<std::string, ShaderType> shader_cache;
};
} // namespace Pica::Shader