renderer_vulkan: Add experimental Vulkan renderer
* Stil extremelly WIP and missing the rasterizer/renderer classes
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -61,3 +61,6 @@
|
||||
[submodule "vulkan-headers"]
|
||||
path = externals/vulkan-headers
|
||||
url = https://github.com/KhronosGroup/Vulkan-Headers
|
||||
[submodule "glslang"]
|
||||
path = externals/glslang
|
||||
url = https://github.com/KhronosGroup/glslang
|
||||
|
3
externals/CMakeLists.txt
vendored
3
externals/CMakeLists.txt
vendored
@ -60,6 +60,9 @@ endif()
|
||||
# Glad
|
||||
add_subdirectory(glad)
|
||||
|
||||
# glslang
|
||||
add_subdirectory(glslang)
|
||||
|
||||
# inih
|
||||
add_subdirectory(inih)
|
||||
|
||||
|
1
externals/glslang
vendored
Submodule
1
externals/glslang
vendored
Submodule
Submodule externals/glslang added at c0cf8ad876
@ -299,6 +299,40 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static Frontend::WindowSystemType GetWindowSystemType() {
|
||||
// Determine WSI type based on Qt platform.
|
||||
QString platform_name = QGuiApplication::platformName();
|
||||
if (platform_name == QStringLiteral("windows"))
|
||||
return Frontend::WindowSystemType::Windows;
|
||||
else if (platform_name == QStringLiteral("xcb"))
|
||||
return Frontend::WindowSystemType::X11;
|
||||
else if (platform_name == QStringLiteral("wayland"))
|
||||
return Frontend::WindowSystemType::Wayland;
|
||||
|
||||
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
|
||||
return Frontend::WindowSystemType::Windows;
|
||||
}
|
||||
|
||||
static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
|
||||
Frontend::EmuWindow::WindowSystemInfo wsi;
|
||||
wsi.type = GetWindowSystemType();
|
||||
|
||||
// Our Win32 Qt external doesn't have the private API.
|
||||
#if defined(WIN32) || defined(__APPLE__)
|
||||
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
|
||||
#else
|
||||
QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
|
||||
wsi.display_connection = pni->nativeResourceForWindow("display", window);
|
||||
if (wsi.type == Frontend::WindowSystemType::Wayland)
|
||||
wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
|
||||
else
|
||||
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
|
||||
#endif
|
||||
wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
|
||||
|
||||
return wsi;
|
||||
}
|
||||
|
||||
GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread)
|
||||
: QWidget(parent_), emu_thread(emu_thread) {
|
||||
|
||||
@ -532,6 +566,9 @@ bool GRenderWindow::InitRenderTarget() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update the Window System information with the new render target
|
||||
window_info = GetWindowSystemInfo(child_widget->windowHandle());
|
||||
|
||||
child_widget->resize(Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight);
|
||||
|
||||
layout()->addWidget(child_widget);
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <concepts>
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
@ -41,6 +42,13 @@ inline u64 HashCombine(std::size_t& seed, const u64 hash) {
|
||||
return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
||||
}
|
||||
|
||||
template <std::integral T>
|
||||
struct IdentityHash {
|
||||
T operator()(const T& value) const {
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
|
||||
template <typename T>
|
||||
struct HashableStruct {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <array>
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/formatter.h"
|
||||
|
||||
namespace Log {
|
||||
|
||||
// trims up to and including the last of ../, ..\, src/, src\ in a string
|
||||
@ -102,6 +103,7 @@ enum class Class : ClassType {
|
||||
Render, ///< Emulator video output and hardware acceleration
|
||||
Render_Software, ///< Software renderer backend
|
||||
Render_OpenGL, ///< OpenGL backend
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
Audio, ///< Audio emulation
|
||||
Audio_DSP, ///< The HLE and LLE implementations of the DSP
|
||||
Audio_Sink, ///< Emulator audio output backend
|
||||
|
@ -12,6 +12,15 @@
|
||||
|
||||
namespace Frontend {
|
||||
|
||||
/// Information for the Graphics Backends signifying what type of screen pointer is in
|
||||
/// WindowInformation
|
||||
enum class WindowSystemType {
|
||||
Headless,
|
||||
Windows,
|
||||
X11,
|
||||
Wayland,
|
||||
};
|
||||
|
||||
struct Frame;
|
||||
/**
|
||||
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
|
||||
@ -117,6 +126,23 @@ public:
|
||||
std::pair<unsigned, unsigned> min_client_area_size;
|
||||
};
|
||||
|
||||
/// Data describing host window system information
|
||||
struct WindowSystemInfo {
|
||||
// Window system type. Determines which GL context or Vulkan WSI is used.
|
||||
WindowSystemType type = WindowSystemType::Headless;
|
||||
|
||||
// Connection to a display server. This is used on X11 and Wayland platforms.
|
||||
void* display_connection = nullptr;
|
||||
|
||||
// Render surface. This is a pointer to the native window handle, which depends
|
||||
// on the platform. e.g. HWND for Windows, Window for X11. If the surface is
|
||||
// set to nullptr, the video backend will run in headless mode.
|
||||
void* render_surface = nullptr;
|
||||
|
||||
// Scale of the render surface. For hidpi systems, this will be >1.
|
||||
float render_surface_scale = 1.0f;
|
||||
};
|
||||
|
||||
/// Polls window events
|
||||
virtual void PollEvents() = 0;
|
||||
|
||||
@ -180,6 +206,13 @@ public:
|
||||
config = val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns system information about the drawing area.
|
||||
*/
|
||||
const WindowSystemInfo& GetWindowInfo() const {
|
||||
return window_info;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the framebuffer layout (width, height, and screen regions)
|
||||
* @note This method is thread-safe
|
||||
@ -226,6 +259,8 @@ protected:
|
||||
framebuffer_layout = layout;
|
||||
}
|
||||
|
||||
WindowSystemInfo window_info;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Handler called when the minimal client area was requested to be changed via SetConfig.
|
||||
|
@ -169,7 +169,7 @@ struct Values {
|
||||
u64 init_time;
|
||||
|
||||
// Renderer
|
||||
GraphicsAPI graphics_api = GraphicsAPI::OpenGL;
|
||||
GraphicsAPI graphics_api = GraphicsAPI::Vulkan;
|
||||
bool use_hw_renderer;
|
||||
bool use_hw_shader;
|
||||
bool separable_shader;
|
||||
|
@ -82,9 +82,32 @@ add_library(video_core STATIC
|
||||
#temporary, move these back in alphabetical order before merging
|
||||
renderer_opengl/gl_format_reinterpreter.cpp
|
||||
renderer_opengl/gl_format_reinterpreter.h
|
||||
renderer_vulkan/pica_to_vk.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_instance.cpp
|
||||
renderer_vulkan/vk_instance.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_platform.h
|
||||
renderer_vulkan/vk_renderpass_cache.cpp
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_shader_gen.cpp
|
||||
renderer_vulkan/vk_shader_gen.h
|
||||
renderer_vulkan/vk_shader.cpp
|
||||
renderer_vulkan/vk_shader.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_task_scheduler.cpp
|
||||
renderer_vulkan/vk_task_scheduler.h
|
||||
renderer_vulkan/vk_texture_runtime.cpp
|
||||
renderer_vulkan/vk_texture_runtime.h
|
||||
shader/debug_data.h
|
||||
shader/shader.cpp
|
||||
shader/shader.h
|
||||
shader/shader_cache.h
|
||||
shader/shader_interpreter.cpp
|
||||
shader/shader_interpreter.h
|
||||
swrasterizer/clipper.cpp
|
||||
@ -156,8 +179,11 @@ endif()
|
||||
|
||||
create_target_directory_groups(video_core)
|
||||
|
||||
# Include Vulkan headers
|
||||
target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include)
|
||||
target_include_directories(video_core PRIVATE ../../externals/vma)
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad nihstro-headers Boost::serialization)
|
||||
target_link_libraries(video_core PRIVATE glad glslang nihstro-headers Boost::serialization)
|
||||
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
|
@ -6,8 +6,7 @@
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/pica_types.h"
|
||||
|
||||
namespace Pica {
|
||||
@ -18,7 +17,7 @@ struct RasterizerRegs {
|
||||
KeepAll = 0,
|
||||
KeepClockWise = 1,
|
||||
KeepCounterClockWise = 2,
|
||||
// TODO: What does the third value imply?
|
||||
KeepAll2 = 3
|
||||
};
|
||||
|
||||
union {
|
||||
|
@ -243,17 +243,12 @@ private:
|
||||
};
|
||||
|
||||
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
|
||||
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
|
||||
const std::string_view version{reinterpret_cast<const char*>(glGetString(GL_VERSION))};
|
||||
|
||||
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
const u32 dst_index = static_cast<u32>(dest);
|
||||
return reinterpreters[dst_index].push_back(std::move(obj));
|
||||
};
|
||||
|
||||
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
|
||||
LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
|
||||
|
||||
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
|
||||
}
|
||||
|
||||
|
@ -302,9 +302,9 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
|
||||
texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
|
||||
MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
|
||||
MICROPROFILE_SCOPE(OpenGL_Upload);
|
||||
|
||||
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
|
||||
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
|
||||
@ -339,9 +339,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
|
||||
InvalidateAllWatcher();
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64));
|
||||
MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) {
|
||||
MICROPROFILE_SCOPE(RasterizerCache_TextureDL);
|
||||
MICROPROFILE_SCOPE(OpenGL_Download);
|
||||
|
||||
// Ensure no bad interactions with GL_PACK_ALIGNMENT
|
||||
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
|
||||
|
278
src/video_core/renderer_vulkan/pica_to_vk.h
Normal file
278
src/video_core/renderer_vulkan/pica_to_vk.h
Normal file
@ -0,0 +1,278 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace PicaToVK {
|
||||
|
||||
using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
|
||||
|
||||
struct FilterInfo {
|
||||
vk::Filter mag_filter, min_filter;
|
||||
vk::SamplerMipmapMode mip_mode;
|
||||
};
|
||||
|
||||
inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
|
||||
constexpr std::array filter_table = {
|
||||
vk::Filter::eNearest,
|
||||
vk::Filter::eLinear
|
||||
};
|
||||
|
||||
constexpr std::array mipmap_table = {
|
||||
vk::SamplerMipmapMode::eNearest,
|
||||
vk::SamplerMipmapMode::eLinear
|
||||
};
|
||||
|
||||
return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
|
||||
}
|
||||
|
||||
inline vk::Filter TextureFilterMode(TextureFilter mode) {
|
||||
switch (mode) {
|
||||
case TextureFilter::Linear:
|
||||
return vk::Filter::eLinear;
|
||||
case TextureFilter::Nearest:
|
||||
return vk::Filter::eNearest;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown texture filtering mode {}", mode);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
return vk::Filter::eLinear;
|
||||
}
|
||||
|
||||
inline vk::SamplerMipmapMode TextureMipFilterMode(TextureFilter mip) {
|
||||
switch (mip) {
|
||||
case TextureFilter::Linear:
|
||||
return vk::SamplerMipmapMode::eLinear;
|
||||
case TextureFilter::Nearest:
|
||||
return vk::SamplerMipmapMode::eNearest;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown texture mipmap filtering mode {}", mip);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
return vk::SamplerMipmapMode::eLinear;
|
||||
}
|
||||
|
||||
inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
|
||||
static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
|
||||
vk::SamplerAddressMode::eClampToEdge,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
vk::SamplerAddressMode::eRepeat,
|
||||
vk::SamplerAddressMode::eMirroredRepeat,
|
||||
// TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
|
||||
// comments in enum WrapMode.
|
||||
vk::SamplerAddressMode::eClampToEdge,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
vk::SamplerAddressMode::eRepeat,
|
||||
vk::SamplerAddressMode::eRepeat,
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(mode);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= wrap_mode_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown texture wrap mode {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::SamplerAddressMode::eClampToEdge;
|
||||
}
|
||||
|
||||
if (index > 3) {
|
||||
Core::System::GetInstance().TelemetrySession().AddField(
|
||||
Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode",
|
||||
static_cast<u32>(index));
|
||||
LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index);
|
||||
}
|
||||
|
||||
return wrap_mode_table[index];
|
||||
}
|
||||
|
||||
inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
|
||||
static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
|
||||
vk::BlendOp::eAdd,
|
||||
vk::BlendOp::eSubtract,
|
||||
vk::BlendOp::eReverseSubtract,
|
||||
vk::BlendOp::eMin,
|
||||
vk::BlendOp::eMax,
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(equation);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= blend_equation_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown blend equation {}", index);
|
||||
|
||||
// This return value is hwtested, not just a stub
|
||||
return vk::BlendOp::eAdd;
|
||||
}
|
||||
|
||||
return blend_equation_table[index];
|
||||
}
|
||||
|
||||
inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
|
||||
static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
|
||||
vk::BlendFactor::eZero, // BlendFactor::Zero
|
||||
vk::BlendFactor::eOne, // BlendFactor::One
|
||||
vk::BlendFactor::eSrcColor, // BlendFactor::SourceColor
|
||||
vk::BlendFactor::eOneMinusSrcColor, // BlendFactor::OneMinusSourceColor
|
||||
vk::BlendFactor::eDstColor, // BlendFactor::DestColor
|
||||
vk::BlendFactor::eOneMinusDstColor, // BlendFactor::OneMinusDestColor
|
||||
vk::BlendFactor::eSrcAlpha, // BlendFactor::SourceAlpha
|
||||
vk::BlendFactor::eOneMinusSrcAlpha, // BlendFactor::OneMinusSourceAlpha
|
||||
vk::BlendFactor::eDstAlpha, // BlendFactor::DestAlpha
|
||||
vk::BlendFactor::eOneMinusDstAlpha, // BlendFactor::OneMinusDestAlpha
|
||||
vk::BlendFactor::eConstantColor, // BlendFactor::ConstantColor
|
||||
vk::BlendFactor::eOneMinusConstantColor,// BlendFactor::OneMinusConstantColor
|
||||
vk::BlendFactor::eConstantAlpha, // BlendFactor::ConstantAlpha
|
||||
vk::BlendFactor::eOneMinusConstantAlpha,// BlendFactor::OneMinusConstantAlpha
|
||||
vk::BlendFactor::eSrcAlphaSaturate, // BlendFactor::SourceAlphaSaturate
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(factor);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= blend_func_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown blend factor {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::BlendFactor::eOne;
|
||||
}
|
||||
|
||||
return blend_func_table[index];
|
||||
}
|
||||
|
||||
inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
|
||||
static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
|
||||
vk::LogicOp::eClear, // Clear
|
||||
vk::LogicOp::eAnd, // And
|
||||
vk::LogicOp::eAndReverse, // AndReverse
|
||||
vk::LogicOp::eCopy, // Copy
|
||||
vk::LogicOp::eSet, // Set
|
||||
vk::LogicOp::eCopyInverted, // CopyInverted
|
||||
vk::LogicOp::eNoOp, // NoOp
|
||||
vk::LogicOp::eInvert, // Invert
|
||||
vk::LogicOp::eNand, // Nand
|
||||
vk::LogicOp::eOr, // Or
|
||||
vk::LogicOp::eNor, // Nor
|
||||
vk::LogicOp::eXor, // Xor
|
||||
vk::LogicOp::eEquivalent, // Equiv
|
||||
vk::LogicOp::eAndInverted, // AndInverted
|
||||
vk::LogicOp::eOrReverse, // OrReverse
|
||||
vk::LogicOp::eOrInverted, // OrInverted
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(op);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= logic_op_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown logic op {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::LogicOp::eCopy;
|
||||
}
|
||||
|
||||
return logic_op_table[index];
|
||||
}
|
||||
|
||||
inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
|
||||
static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
|
||||
vk::CompareOp::eNever, // CompareFunc::Never
|
||||
vk::CompareOp::eAlways, // CompareFunc::Always
|
||||
vk::CompareOp::eEqual, // CompareFunc::Equal
|
||||
vk::CompareOp::eNotEqual, // CompareFunc::NotEqual
|
||||
vk::CompareOp::eLess, // CompareFunc::LessThan
|
||||
vk::CompareOp::eLessOrEqual, // CompareFunc::LessThanOrEqual
|
||||
vk::CompareOp::eGreater, // CompareFunc::GreaterThan
|
||||
vk::CompareOp::eGreaterOrEqual, // CompareFunc::GreaterThanOrEqual
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(func);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= compare_func_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown compare function {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::CompareOp::eAlways;
|
||||
}
|
||||
|
||||
return compare_func_table[index];
|
||||
}
|
||||
|
||||
inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
|
||||
static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
|
||||
vk::StencilOp::eKeep, // StencilAction::Keep
|
||||
vk::StencilOp::eZero, // StencilAction::Zero
|
||||
vk::StencilOp::eReplace, // StencilAction::Replace
|
||||
vk::StencilOp::eIncrementAndClamp, // StencilAction::Increment
|
||||
vk::StencilOp::eDecrementAndClamp, // StencilAction::Decrement
|
||||
vk::StencilOp::eInvert, // StencilAction::Invert
|
||||
vk::StencilOp::eIncrementAndWrap, // StencilAction::IncrementWrap
|
||||
vk::StencilOp::eDecrementAndWrap, // StencilAction::DecrementWrap
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(action);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= stencil_op_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown stencil op {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::StencilOp::eKeep;
|
||||
}
|
||||
|
||||
return stencil_op_table[index];
|
||||
}
|
||||
|
||||
inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopology topology) {
|
||||
switch (topology) {
|
||||
case Pica::PipelineRegs::TriangleTopology::Fan:
|
||||
return vk::PrimitiveTopology::eTriangleFan;
|
||||
case Pica::PipelineRegs::TriangleTopology::List:
|
||||
case Pica::PipelineRegs::TriangleTopology::Shader:
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Pica::PipelineRegs::TriangleTopology::Strip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
}
|
||||
}
|
||||
|
||||
inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
|
||||
switch (mode) {
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll:
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll2:
|
||||
return vk::CullModeFlagBits::eNone;
|
||||
case Pica::RasterizerRegs::CullMode::KeepClockWise:
|
||||
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
|
||||
return vk::CullModeFlagBits::eBack;
|
||||
}
|
||||
}
|
||||
|
||||
inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
|
||||
switch (mode) {
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll:
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll2:
|
||||
case Pica::RasterizerRegs::CullMode::KeepClockWise:
|
||||
return vk::FrontFace::eCounterClockwise;
|
||||
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
|
||||
return vk::FrontFace::eClockwise;
|
||||
}
|
||||
}
|
||||
|
||||
inline Common::Vec4f ColorRGBA8(const u32 color) {
|
||||
const auto rgba =
|
||||
Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
|
||||
return rgba / 255.0f;
|
||||
}
|
||||
|
||||
inline Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
|
||||
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
|
||||
}
|
||||
|
||||
} // namespace PicaToGL
|
9
src/video_core/renderer_vulkan/vk_common.cpp
Normal file
9
src/video_core/renderer_vulkan/vk_common.cpp
Normal file
@ -0,0 +1,9 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VMA_IMPLEMENTATION
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
// Store the dispatch loader here
|
||||
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
66
src/video_core/renderer_vulkan/vk_common.h
Normal file
66
src/video_core/renderer_vulkan/vk_common.h
Normal file
@ -0,0 +1,66 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
// Include Vulkan memory allocator
|
||||
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
||||
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
|
||||
#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Return the image aspect associated on the provided format
|
||||
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD32Sfloat:
|
||||
return vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
default:
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a bit mask with the required usage of a format with a particular aspect
|
||||
constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
|
||||
auto usage = vk::ImageUsageFlagBits::eSampled |
|
||||
vk::ImageUsageFlagBits::eTransferDst |
|
||||
vk::ImageUsageFlagBits::eTransferSrc;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
return usage | vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns a bit mask with the required features of a format with a particular aspect
|
||||
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
|
||||
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
|
||||
vk::FormatFeatureFlagBits::eTransferDst |
|
||||
vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitDst;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
return usage | vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
292
src/video_core/renderer_vulkan/vk_instance.cpp
Normal file
292
src/video_core/renderer_vulkan/vk_instance.cpp
Normal file
@ -0,0 +1,292 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <span>
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Instance::Instance(Frontend::EmuWindow& window) {
|
||||
auto window_info = window.GetWindowInfo();
|
||||
|
||||
// Fetch instance independant function pointers
|
||||
vk::DynamicLoader dl;
|
||||
auto vkGetInstanceProcAddr = dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
|
||||
|
||||
// Enable the instance extensions the backend uses
|
||||
auto extensions = GetInstanceExtensions(window_info.type, true);
|
||||
|
||||
// We require a Vulkan 1.1 driver
|
||||
const u32 available_version = vk::enumerateInstanceVersion();
|
||||
if (available_version < VK_API_VERSION_1_1) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
|
||||
}
|
||||
|
||||
const vk::ApplicationInfo application_info = {
|
||||
.pApplicationName = "Citra",
|
||||
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.pEngineName = "Citra Vulkan",
|
||||
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.apiVersion = available_version
|
||||
};
|
||||
|
||||
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
|
||||
const vk::InstanceCreateInfo instance_info = {
|
||||
.pApplicationInfo = &application_info,
|
||||
.enabledLayerCount = static_cast<u32>(layers.size()),
|
||||
.ppEnabledLayerNames = layers.data(),
|
||||
.enabledExtensionCount = static_cast<u32>(extensions.size()),
|
||||
.ppEnabledExtensionNames = extensions.data()
|
||||
};
|
||||
|
||||
// Create VkInstance
|
||||
instance = vk::createInstance(instance_info);
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance);
|
||||
surface = CreateSurface(instance, window);
|
||||
|
||||
// TODO: GPU select dialog
|
||||
physical_device = instance.enumeratePhysicalDevices()[0];
|
||||
device_limits = physical_device.getProperties().limits;
|
||||
|
||||
// Create logical device
|
||||
CreateDevice(true);
|
||||
}
|
||||
|
||||
Instance::~Instance() {
|
||||
device.waitIdle();
|
||||
vmaDestroyAllocator(allocator);
|
||||
device.destroy();
|
||||
instance.destroySurfaceKHR(surface);
|
||||
instance.destroy();
|
||||
}
|
||||
|
||||
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
|
||||
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
|
||||
if (auto it = supported.find(format); it != supported.end()) {
|
||||
return (it->second.optimalTilingFeatures & usage) == usage;
|
||||
}
|
||||
|
||||
// Cache format properties so we don't have to query the driver all the time
|
||||
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
|
||||
supported.insert(std::make_pair(format, properties));
|
||||
|
||||
return (properties.optimalTilingFeatures & usage) == usage;
|
||||
}
|
||||
|
||||
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
|
||||
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
|
||||
if (IsFormatSupported(format, features)) {
|
||||
return format;
|
||||
}
|
||||
|
||||
// Return the most supported alternative format preferably with the
|
||||
// same block size according to the Vulkan spec.
|
||||
// See 43.3. Required Format Support of the Vulkan spec
|
||||
switch (format) {
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return vk::Format::eD32SfloatS8Uint;
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
return vk::Format::eD32Sfloat;
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
return vk::Format::eA1R5G5B5UnormPack16;
|
||||
case vk::Format::eR8G8B8Unorm:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case vk::Format::eUndefined:
|
||||
return vk::Format::eUndefined;
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
// B4G4R4A4 is not guaranteed by the spec to support attachments
|
||||
return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16);
|
||||
default:
|
||||
LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}",
|
||||
vk::to_string(format), vk::to_string(features));
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
bool Instance::CreateDevice(bool validation_enabled) {
|
||||
// Determine required extensions and features
|
||||
auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
|
||||
vk::PhysicalDeviceDynamicRenderingFeaturesKHR,
|
||||
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
|
||||
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>();
|
||||
|
||||
// Not having geometry shaders or wide lines will cause issues with rendering.
|
||||
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
|
||||
if (!available.geometryShader && !available.wideLines) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
|
||||
}
|
||||
|
||||
// Enable some common features other emulators like Dolphin use
|
||||
const vk::PhysicalDeviceFeatures2 features = {
|
||||
.features = {
|
||||
.robustBufferAccess = available.robustBufferAccess,
|
||||
.geometryShader = available.geometryShader,
|
||||
.sampleRateShading = available.sampleRateShading,
|
||||
.dualSrcBlend = available.dualSrcBlend,
|
||||
.logicOp = available.logicOp,
|
||||
.depthClamp = available.depthClamp,
|
||||
.largePoints = available.largePoints,
|
||||
.samplerAnisotropy = available.samplerAnisotropy,
|
||||
.occlusionQueryPrecise = available.occlusionQueryPrecise,
|
||||
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
|
||||
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = available.shaderClipDistance
|
||||
}
|
||||
};
|
||||
|
||||
// Enable newer Vulkan features
|
||||
auto enabled_features = vk::StructureChain{
|
||||
features,
|
||||
//feature_chain.get<vk::PhysicalDeviceDynamicRenderingFeaturesKHR>(),
|
||||
//feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
|
||||
//feature_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
|
||||
};
|
||||
|
||||
auto extension_list = physical_device.enumerateDeviceExtensionProperties();
|
||||
if (extension_list.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// List available device extensions
|
||||
for (const auto& extension : extension_list) {
|
||||
LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName);
|
||||
}
|
||||
|
||||
// Helper lambda for adding extensions
|
||||
std::array<const char*, 6> enabled_extensions;
|
||||
u32 enabled_extension_count = 0;
|
||||
|
||||
auto AddExtension = [&](std::string_view name, bool required) -> bool {
|
||||
auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
|
||||
return name.compare(prop.extensionName.data());
|
||||
});
|
||||
|
||||
if (result != extension_list.end()) {
|
||||
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
|
||||
enabled_extensions[enabled_extension_count++] = name.data();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (required) {
|
||||
LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
// Add required extensions
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true);
|
||||
|
||||
// Check for optional features
|
||||
//dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
|
||||
//extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
|
||||
//push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
|
||||
|
||||
// Search queue families for graphics and present queues
|
||||
auto family_properties = physical_device.getQueueFamilyProperties();
|
||||
if (family_properties.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
|
||||
return false;
|
||||
}
|
||||
|
||||
graphics_queue_family_index = -1;
|
||||
present_queue_family_index = -1;
|
||||
for (int i = 0; i < family_properties.size(); i++) {
|
||||
// Check if queue supports graphics
|
||||
if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) {
|
||||
graphics_queue_family_index = i;
|
||||
|
||||
// If this queue also supports presentation we are finished
|
||||
if (physical_device.getSurfaceSupportKHR(i, surface)) {
|
||||
present_queue_family_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if queue supports presentation
|
||||
if (physical_device.getSurfaceSupportKHR(i, surface)) {
|
||||
present_queue_family_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (graphics_queue_family_index == -1 || present_queue_family_index == -1) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr float queue_priorities[] = {1.0f};
|
||||
|
||||
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
|
||||
const std::array queue_infos = {
|
||||
vk::DeviceQueueCreateInfo{
|
||||
.queueFamilyIndex = graphics_queue_family_index,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = queue_priorities
|
||||
},
|
||||
vk::DeviceQueueCreateInfo{
|
||||
.queueFamilyIndex = present_queue_family_index,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = queue_priorities
|
||||
}
|
||||
};
|
||||
|
||||
vk::DeviceCreateInfo device_info = {
|
||||
.pNext = &features, // TODO: Change this
|
||||
.queueCreateInfoCount = 1,
|
||||
.pQueueCreateInfos = queue_infos.data(),
|
||||
.enabledExtensionCount = enabled_extension_count,
|
||||
.ppEnabledExtensionNames = enabled_extensions.data(),
|
||||
};
|
||||
|
||||
if (graphics_queue_family_index != present_queue_family_index) {
|
||||
device_info.queueCreateInfoCount = 2;
|
||||
}
|
||||
|
||||
// Enable debug layer on debug builds
|
||||
if (validation_enabled) {
|
||||
device_info.enabledLayerCount = static_cast<u32>(layers.size());
|
||||
device_info.ppEnabledLayerNames = layers.data();
|
||||
}
|
||||
|
||||
// Create logical device
|
||||
device = physical_device.createDevice(device_info);
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(device);
|
||||
|
||||
// Grab the graphics and present queues.
|
||||
graphics_queue = device.getQueue(graphics_queue_family_index, 0);
|
||||
present_queue = device.getQueue(present_queue_family_index, 0);
|
||||
|
||||
// Create the VMA allocator
|
||||
CreateAllocator();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Instance::CreateAllocator() {
|
||||
VmaVulkanFunctions functions = {
|
||||
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
|
||||
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
|
||||
};
|
||||
|
||||
VmaAllocatorCreateInfo allocator_info = {
|
||||
.physicalDevice = physical_device,
|
||||
.device = device,
|
||||
.pVulkanFunctions = &functions,
|
||||
.instance = instance,
|
||||
.vulkanApiVersion = VK_API_VERSION_1_1
|
||||
};
|
||||
|
||||
if (auto result = vmaCreateAllocator(&allocator_info, &allocator); result != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize VMA with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
129
src/video_core/renderer_vulkan/vk_instance.h
Normal file
129
src/video_core/renderer_vulkan/vk_instance.h
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// The global Vulkan instance
|
||||
class Instance {
|
||||
public:
|
||||
Instance(Frontend::EmuWindow& window);
|
||||
~Instance();
|
||||
|
||||
/// Returns true when the format supports the provided feature flags
|
||||
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
|
||||
|
||||
/// Returns the most compatible format that supports the provided feature flags
|
||||
vk::Format GetFormatAlternative(vk::Format format) const;
|
||||
|
||||
/// Returns the Vulkan instance
|
||||
vk::Instance GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan surface
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Returns the current physical device
|
||||
vk::PhysicalDevice GetPhysicalDevice() const {
|
||||
return physical_device;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan device
|
||||
vk::Device GetDevice() const {
|
||||
return device;
|
||||
}
|
||||
|
||||
VmaAllocator GetAllocator() const {
|
||||
return allocator;
|
||||
}
|
||||
|
||||
/// Retrieve queue information
|
||||
u32 GetGraphicsQueueFamilyIndex() const {
|
||||
return graphics_queue_family_index;
|
||||
}
|
||||
|
||||
u32 GetPresentQueueFamilyIndex() const {
|
||||
return present_queue_family_index;
|
||||
}
|
||||
|
||||
vk::Queue GetGraphicsQueue() const {
|
||||
return graphics_queue;
|
||||
}
|
||||
|
||||
vk::Queue GetPresentQueue() const {
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Feature support
|
||||
bool IsDynamicRenderingSupported() const {
|
||||
return dynamic_rendering;
|
||||
}
|
||||
|
||||
bool IsExtendedDynamicStateSupported() const {
|
||||
// TODO: Enable this when the pipeline builder is confirmed functional
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsPushDescriptorsSupported() const {
|
||||
return push_descriptors;
|
||||
}
|
||||
|
||||
/// Returns the vendor ID of the physical device
|
||||
u32 GetVendorID() const {
|
||||
return device_properties.vendorID;
|
||||
}
|
||||
|
||||
/// Returns the device ID of the physical device
|
||||
u32 GetDeviceID() const {
|
||||
return device_properties.deviceID;
|
||||
}
|
||||
|
||||
/// Returns the pipeline cache unique identifier
|
||||
const auto GetPipelineCacheUUID() const {
|
||||
return device_properties.pipelineCacheUUID;
|
||||
}
|
||||
|
||||
/// Returns the minimum required alignment for uniforms
|
||||
vk::DeviceSize UniformMinAlignment() const {
|
||||
return device_limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
private:
|
||||
bool CreateDevice(bool validation_enabled);
|
||||
void CreateAllocator();
|
||||
|
||||
private:
|
||||
// Queue family indexes
|
||||
u32 present_queue_family_index = 0;
|
||||
u32 graphics_queue_family_index = 0;
|
||||
vk::Queue present_queue, graphics_queue;
|
||||
|
||||
// Core vulkan objects
|
||||
vk::Device device;
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::Instance instance;
|
||||
vk::SurfaceKHR surface;
|
||||
vk::PhysicalDeviceLimits device_limits;
|
||||
vk::PhysicalDeviceProperties device_properties;
|
||||
VmaAllocator allocator;
|
||||
|
||||
// Features per vulkan version
|
||||
bool dynamic_rendering = false;
|
||||
bool extended_dynamic_state = false;
|
||||
bool push_descriptors = false;
|
||||
};
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
646
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
646
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
@ -0,0 +1,646 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/common_paths.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct Bindings {
|
||||
std::array<vk::DescriptorType, MAX_DESCRIPTORS> bindings;
|
||||
u32 binding_count;
|
||||
};
|
||||
|
||||
constexpr u32 RASTERIZER_SET_COUNT = 4;
|
||||
constexpr static std::array RASTERIZER_SETS = {
|
||||
Bindings{
|
||||
// Utility set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eUniformBuffer,
|
||||
vk::DescriptorType::eUniformBuffer,
|
||||
vk::DescriptorType::eUniformTexelBuffer,
|
||||
vk::DescriptorType::eUniformTexelBuffer,
|
||||
vk::DescriptorType::eUniformTexelBuffer
|
||||
},
|
||||
.binding_count = 5
|
||||
},
|
||||
Bindings{
|
||||
// Texture set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eSampledImage,
|
||||
vk::DescriptorType::eSampledImage,
|
||||
vk::DescriptorType::eSampledImage,
|
||||
vk::DescriptorType::eSampledImage
|
||||
},
|
||||
.binding_count = 4
|
||||
},
|
||||
Bindings{
|
||||
// Sampler set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eSampler,
|
||||
vk::DescriptorType::eSampler,
|
||||
vk::DescriptorType::eSampler,
|
||||
vk::DescriptorType::eSampler
|
||||
},
|
||||
.binding_count = 4
|
||||
},
|
||||
Bindings {
|
||||
// Shadow set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage
|
||||
},
|
||||
.binding_count = 7
|
||||
}
|
||||
};
|
||||
|
||||
constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) {
|
||||
vk::ShaderStageFlags flags;
|
||||
switch (type) {
|
||||
case vk::DescriptorType::eSampler:
|
||||
case vk::DescriptorType::eSampledImage:
|
||||
case vk::DescriptorType::eUniformTexelBuffer:
|
||||
case vk::DescriptorType::eStorageImage:
|
||||
flags = vk::ShaderStageFlagBits::eFragment;
|
||||
break;
|
||||
case vk::DescriptorType::eUniformBuffer:
|
||||
case vk::DescriptorType::eUniformBufferDynamic:
|
||||
flags = vk::ShaderStageFlagBits::eFragment |
|
||||
vk::ShaderStageFlagBits::eVertex |
|
||||
vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eCompute;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
u32 AttribBytes(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
return sizeof(float) * attrib.size;
|
||||
case AttribType::Int:
|
||||
return sizeof(u32) * attrib.size;
|
||||
case AttribType::Short:
|
||||
return sizeof(u16) * attrib.size;
|
||||
case AttribType::Byte:
|
||||
case AttribType::Ubyte:
|
||||
return sizeof(u8) * attrib.size;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
switch (attrib.size) {
|
||||
case 1: return vk::Format::eR32Sfloat;
|
||||
case 2: return vk::Format::eR32G32Sfloat;
|
||||
case 3: return vk::Format::eR32G32B32Sfloat;
|
||||
case 4: return vk::Format::eR32G32B32A32Sfloat;
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return vk::Format::eR32Sfloat;
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
switch (index) {
|
||||
case 0: return vk::ShaderStageFlagBits::eVertex;
|
||||
case 1: return vk::ShaderStageFlagBits::eFragment;
|
||||
case 2: return vk::ShaderStageFlagBits::eGeometry;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
|
||||
descriptor_dirty.fill(true);
|
||||
|
||||
LoadDiskCache();
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
SaveDiskCache();
|
||||
|
||||
device.destroyPipelineLayout(layout);
|
||||
for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
|
||||
device.destroyDescriptorSetLayout(descriptor_set_layouts[i]);
|
||||
device.destroyDescriptorUpdateTemplate(update_templates[i]);
|
||||
}
|
||||
|
||||
for (const auto& [hash, pipeline] : graphics_pipelines) {
|
||||
device.destroyPipeline(pipeline);
|
||||
}
|
||||
|
||||
graphics_pipelines.clear();
|
||||
}
|
||||
|
||||
void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
ApplyDynamic(info);
|
||||
|
||||
u64 shader_hash = 0;
|
||||
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
|
||||
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
|
||||
}
|
||||
|
||||
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() ?
|
||||
offsetof(PipelineInfo, rasterization) :
|
||||
offsetof(PipelineInfo, depth_stencil) + offsetof(DepthStencilState, stencil_reference);
|
||||
|
||||
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
|
||||
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
|
||||
|
||||
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
|
||||
if (new_pipeline) {
|
||||
it->second = BuildPipeline(info);
|
||||
}
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
|
||||
|
||||
BindDescriptorSets();
|
||||
}
|
||||
|
||||
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
const PicaVSConfig config{regs.vs, setup};
|
||||
auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
if (!handle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
current_shaders[ProgramType::VS] = handle;
|
||||
shader_hashes[ProgramType::VS] = config.Hash();
|
||||
return true;
|
||||
}
|
||||
|
||||
void PipelineCache::UseTrivialVertexShader() {
|
||||
current_shaders[ProgramType::VS] = trivial_vertex_shader;
|
||||
shader_hashes[ProgramType::VS] = 0;
|
||||
}
|
||||
|
||||
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
const PicaFixedGSConfig gs_config{regs};
|
||||
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
current_shaders[ProgramType::GS] = handle;
|
||||
shader_hashes[ProgramType::GS] = gs_config.Hash();
|
||||
}
|
||||
|
||||
void PipelineCache::UseTrivialGeometryShader() {
|
||||
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
|
||||
shader_hashes[ProgramType::GS] = 0;
|
||||
}
|
||||
|
||||
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
|
||||
const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
|
||||
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
current_shaders[ProgramType::FS] = handle;
|
||||
shader_hashes[ProgramType::FS] = config.Hash();
|
||||
}
|
||||
|
||||
void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_view) {
|
||||
const DescriptorData data = {
|
||||
.image_info = vk::DescriptorImageInfo{
|
||||
.imageView = image_view,
|
||||
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
|
||||
}
|
||||
};
|
||||
|
||||
SetBinding(set, descriptor, data);
|
||||
}
|
||||
|
||||
void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 offset, u32 size) {
|
||||
const DescriptorData data = {
|
||||
.buffer_info = vk::DescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.range = size
|
||||
}
|
||||
};
|
||||
|
||||
SetBinding(set, descriptor, data);
|
||||
}
|
||||
|
||||
void PipelineCache::BindTexelBuffer(u32 set, u32 descriptor, vk::BufferView buffer_view) {
|
||||
const DescriptorData data = {
|
||||
.buffer_view = buffer_view
|
||||
};
|
||||
|
||||
SetBinding(set, descriptor, data);
|
||||
}
|
||||
|
||||
void PipelineCache::BindSampler(u32 set, u32 descriptor, vk::Sampler sampler) {
|
||||
const DescriptorData data = {
|
||||
.image_info = vk::DescriptorImageInfo{
|
||||
.sampler = sampler
|
||||
}
|
||||
};
|
||||
|
||||
SetBinding(set, descriptor, data);
|
||||
}
|
||||
|
||||
void PipelineCache::SetViewport(float x, float y, float width, float height) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f});
|
||||
}
|
||||
|
||||
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}});
|
||||
}
|
||||
|
||||
void PipelineCache::MarkDescriptorSetsDirty() {
|
||||
descriptor_dirty.fill(true);
|
||||
}
|
||||
|
||||
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_compare_mask);
|
||||
command_buffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_write_mask);
|
||||
command_buffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_reference);
|
||||
|
||||
if (instance.IsExtendedDynamicStateSupported()) {
|
||||
command_buffer.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode));
|
||||
command_buffer.setDepthCompareOpEXT(PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op));
|
||||
command_buffer.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable);
|
||||
command_buffer.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable);
|
||||
command_buffer.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode));
|
||||
command_buffer.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(info.rasterization.topology));
|
||||
command_buffer.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable);
|
||||
command_buffer.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
|
||||
PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
|
||||
PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
|
||||
PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op));
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::SetBinding(u32 set, u32 binding, DescriptorData data) {
|
||||
if (update_data[set][binding] != data) {
|
||||
update_data[set][binding] = data;
|
||||
descriptor_dirty[set] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::BuildLayout() {
|
||||
std::array<vk::DescriptorSetLayoutBinding, MAX_DESCRIPTORS> set_bindings;
|
||||
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
|
||||
const auto& set = RASTERIZER_SETS[i];
|
||||
for (u32 j = 0; j < set.binding_count; j++) {
|
||||
vk::DescriptorType type = set.bindings[j];
|
||||
set_bindings[j] = vk::DescriptorSetLayoutBinding{
|
||||
.binding = j,
|
||||
.descriptorType = type,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = ToVkStageFlags(type)
|
||||
};
|
||||
|
||||
update_entries[j] = vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = j,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = type,
|
||||
.offset = j * sizeof(DescriptorData),
|
||||
.stride = 0
|
||||
};
|
||||
}
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo layout_info = {
|
||||
.bindingCount = set.binding_count,
|
||||
.pBindings = set_bindings.data()
|
||||
};
|
||||
|
||||
// Create descriptor set layout
|
||||
descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info);
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
|
||||
.descriptorUpdateEntryCount = set.binding_count,
|
||||
.pDescriptorUpdateEntries = update_entries.data(),
|
||||
.descriptorSetLayout = descriptor_set_layouts[i]
|
||||
};
|
||||
|
||||
// Create descriptor set update template
|
||||
update_templates[i] = device.createDescriptorUpdateTemplate(template_info);
|
||||
}
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = RASTERIZER_SET_COUNT,
|
||||
.pSetLayouts = descriptor_set_layouts.data(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr
|
||||
};
|
||||
|
||||
layout = device.createPipelineLayout(layout_info);
|
||||
}
|
||||
|
||||
vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
u32 shader_count = 0;
|
||||
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
|
||||
for (std::size_t i = 0; i < current_shaders.size(); i++) {
|
||||
vk::ShaderModule shader = current_shaders[i];
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
|
||||
shader_stages[i] = vk::PipelineShaderStageCreateInfo{
|
||||
.stage = ToVkShaderStage(i),
|
||||
.module = shader,
|
||||
.pName = "main"
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing
|
||||
* data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate.
|
||||
* Since one instance is all we render, the shader will always read the single attribute.
|
||||
*/
|
||||
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
|
||||
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
|
||||
const auto& binding = info.vertex_layout.bindings[i];
|
||||
bindings[i] = vk::VertexInputBindingDescription{
|
||||
.binding = binding.binding,
|
||||
.stride = binding.stride,
|
||||
.inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance
|
||||
: vk::VertexInputRate::eVertex
|
||||
};
|
||||
}
|
||||
|
||||
// Populate vertex attribute structures
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
|
||||
const auto& attr = info.vertex_layout.attributes[i];
|
||||
attributes[i] = vk::VertexInputAttributeDescription{
|
||||
.location = attr.location,
|
||||
.binding = attr.binding,
|
||||
.format = ToVkAttributeFormat(attr),
|
||||
.offset = attr.offset
|
||||
};
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
|
||||
.pVertexAttributeDescriptions = attributes.data()
|
||||
};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
|
||||
.primitiveRestartEnable = false
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
|
||||
.frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode),
|
||||
.depthBiasEnable = false,
|
||||
.lineWidth = 1.0f
|
||||
};
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
.sampleShadingEnable = false
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
|
||||
.blendEnable = info.blending.blend_enable.Value(),
|
||||
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
|
||||
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
|
||||
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
|
||||
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
|
||||
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
|
||||
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
|
||||
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = info.blending.logic_op_enable.Value(),
|
||||
.logicOp = PicaToVK::LogicOp(info.blending.logic_op),
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &colorblend_attachment,
|
||||
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}
|
||||
};
|
||||
|
||||
const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
|
||||
const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}};
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info = {
|
||||
.viewportCount = 1,
|
||||
.pViewports = &placeholder_viewport,
|
||||
.scissorCount = 1,
|
||||
.pScissors = &placeholder_scissor,
|
||||
};
|
||||
|
||||
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
|
||||
const std::array dynamic_states = {
|
||||
vk::DynamicState::eViewport,
|
||||
vk::DynamicState::eScissor,
|
||||
vk::DynamicState::eLineWidth,
|
||||
vk::DynamicState::eStencilCompareMask,
|
||||
vk::DynamicState::eStencilWriteMask,
|
||||
vk::DynamicState::eStencilReference,
|
||||
// VK_EXT_extended_dynamic_state
|
||||
vk::DynamicState::eCullModeEXT,
|
||||
vk::DynamicState::eDepthCompareOpEXT,
|
||||
vk::DynamicState::eDepthTestEnableEXT,
|
||||
vk::DynamicState::eDepthWriteEnableEXT,
|
||||
vk::DynamicState::eFrontFaceEXT,
|
||||
vk::DynamicState::ePrimitiveTopologyEXT,
|
||||
vk::DynamicState::eStencilOpEXT,
|
||||
vk::DynamicState::eStencilTestEnableEXT,
|
||||
};
|
||||
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||
.dynamicStateCount = extended_dynamic_states ? 14u : 6u,
|
||||
.pDynamicStates = dynamic_states.data()
|
||||
};
|
||||
|
||||
const vk::StencilOpState stencil_op_state = {
|
||||
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
|
||||
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
|
||||
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
|
||||
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)
|
||||
};
|
||||
|
||||
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
|
||||
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
|
||||
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
|
||||
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
|
||||
.depthBoundsTestEnable = false,
|
||||
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
|
||||
.front = stencil_op_state,
|
||||
.back = stencil_op_state
|
||||
};
|
||||
|
||||
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||
.stageCount = shader_count,
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = &vertex_input_info,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
.pDepthStencilState = &depth_info,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_info,
|
||||
.layout = layout,
|
||||
.renderPass = renderpass_cache.GetRenderpass(info.color_attachment,
|
||||
info.depth_attachment, false)
|
||||
};
|
||||
|
||||
if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info);
|
||||
result.result == vk::Result::eSuccess) {
|
||||
return result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void PipelineCache::BindDescriptorSets() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
|
||||
if (descriptor_dirty[i] || !descriptor_sets[i]) {
|
||||
const vk::DescriptorSetAllocateInfo alloc_info = {
|
||||
.descriptorPool = scheduler.GetDescriptorPool(),
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &descriptor_set_layouts[i]
|
||||
};
|
||||
|
||||
vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0];
|
||||
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i].data());
|
||||
|
||||
descriptor_sets[i] = set;
|
||||
descriptor_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Bind the descriptor sets
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, RASTERIZER_SET_COUNT,
|
||||
descriptor_sets.data(), 0, nullptr);
|
||||
}
|
||||
|
||||
void PipelineCache::LoadDiskCache() {
|
||||
const std::string cache_path =
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "vulkan" + DIR_SEP "pipelines.bin";
|
||||
|
||||
FileUtil::IOFile cache_file{cache_path, "r"};
|
||||
if (!cache_file.IsOpen()) {
|
||||
LOG_INFO(Render_Vulkan, "No pipeline cache found");
|
||||
}
|
||||
|
||||
const u32 cache_file_size = cache_file.GetSize();
|
||||
auto cache_data = std::vector<u8>(cache_file_size);
|
||||
if (!cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
|
||||
LOG_WARNING(Render_Vulkan, "Error during pipeline cache read");
|
||||
return;
|
||||
}
|
||||
|
||||
cache_file.Close();
|
||||
|
||||
const bool is_valid = ValidateData(cache_data.data(), cache_file_size);
|
||||
const vk::PipelineCacheCreateInfo cache_info = {
|
||||
.initialDataSize = is_valid ? cache_file_size : 0,
|
||||
.pInitialData = cache_data.data()
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
pipeline_cache = device.createPipelineCache(cache_info);
|
||||
}
|
||||
|
||||
void PipelineCache::SaveDiskCache() {
|
||||
const std::string cache_path =
|
||||
FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "vulkan" + DIR_SEP "pipelines.bin";
|
||||
|
||||
FileUtil::IOFile cache_file{cache_path, "w"};
|
||||
if (!cache_file.IsOpen()) {
|
||||
LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
auto cache_data = device.getPipelineCacheData(pipeline_cache);
|
||||
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
|
||||
LOG_WARNING(Render_Vulkan, "Error during pipeline cache write");
|
||||
return;
|
||||
}
|
||||
|
||||
cache_file.Close();
|
||||
}
|
||||
|
||||
bool PipelineCache::ValidateData(const u8* data, u32 size) {
|
||||
if (size < sizeof(vk::PipelineCacheHeaderVersionOne)) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");
|
||||
return false;
|
||||
}
|
||||
|
||||
vk::PipelineCacheHeaderVersionOne header;
|
||||
std::memcpy(&header, data, sizeof(header));
|
||||
if (header.headerSize < sizeof(header)) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})",
|
||||
header.vendorID, vendor_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})",
|
||||
header.deviceID, device_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
248
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
248
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
@ -0,0 +1,248 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/hash.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/shader/shader_cache.h"
|
||||
#include "video_core/regs.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 MAX_SHADER_STAGES = 3;
|
||||
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr u32 MAX_VERTEX_BINDINGS = 16;
|
||||
constexpr u32 MAX_DESCRIPTORS = 8;
|
||||
constexpr u32 MAX_DESCRIPTOR_SETS = 6;
|
||||
|
||||
enum class AttribType : u32 {
|
||||
Float = 0,
|
||||
Int = 1,
|
||||
Short = 2,
|
||||
Byte = 3,
|
||||
Ubyte = 4
|
||||
};
|
||||
|
||||
/**
|
||||
* The pipeline state is tightly packed with bitfields to reduce
|
||||
* the overhead of hashing as much as possible
|
||||
*/
|
||||
union RasterizationState {
|
||||
u8 value = 0;
|
||||
BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology;
|
||||
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
|
||||
};
|
||||
|
||||
struct DepthStencilState {
|
||||
union {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<1, 1, u32> depth_write_enable;
|
||||
BitField<2, 1, u32> stencil_test_enable;
|
||||
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
|
||||
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
|
||||
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
|
||||
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
|
||||
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
|
||||
};
|
||||
|
||||
// These are dynamic state so keep them separate
|
||||
u8 stencil_reference;
|
||||
u8 stencil_compare_mask;
|
||||
u8 stencil_write_mask;
|
||||
};
|
||||
|
||||
union BlendingState {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> blend_enable;
|
||||
BitField<1, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor;
|
||||
BitField<5, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor;
|
||||
BitField<9, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq;
|
||||
BitField<12, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor;
|
||||
BitField<16, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor;
|
||||
BitField<20, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq;
|
||||
BitField<23, 4, u32> color_write_mask;
|
||||
BitField<27, 1, u32> logic_op_enable;
|
||||
BitField<28, 4, Pica::FramebufferRegs::LogicOp> logic_op;
|
||||
};
|
||||
|
||||
union VertexBinding {
|
||||
u16 value = 0;
|
||||
BitField<0, 4, u16> binding;
|
||||
BitField<4, 1, u16> fixed;
|
||||
BitField<5, 11, u16> stride;
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 value = 0;
|
||||
BitField<0, 4, u32> binding;
|
||||
BitField<4, 4, u32> location;
|
||||
BitField<8, 3, AttribType> type;
|
||||
BitField<11, 3, u32> size;
|
||||
BitField<14, 11, u32> offset;
|
||||
};
|
||||
|
||||
struct VertexLayout {
|
||||
u8 binding_count;
|
||||
u8 attribute_count;
|
||||
std::array<VertexBinding, MAX_VERTEX_BINDINGS> bindings;
|
||||
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
};
|
||||
|
||||
/**
|
||||
* Information about a graphics/compute pipeline
|
||||
*/
|
||||
struct PipelineInfo {
|
||||
VertexLayout vertex_layout{};
|
||||
BlendingState blending{};
|
||||
VideoCore::PixelFormat color_attachment = VideoCore::PixelFormat::RGBA8;
|
||||
VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
|
||||
RasterizationState rasterization{};
|
||||
DepthStencilState depth_stencil{};
|
||||
};
|
||||
|
||||
union DescriptorData {
|
||||
vk::DescriptorImageInfo image_info;
|
||||
vk::DescriptorBufferInfo buffer_info;
|
||||
vk::BufferView buffer_view;
|
||||
|
||||
bool operator!=(const DescriptorData& other) const {
|
||||
return std::memcmp(this, &other, sizeof(DescriptorData)) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
|
||||
|
||||
/**
|
||||
* Vulkan specialized PICA shader caches
|
||||
*/
|
||||
using ProgrammableVertexShaders =
|
||||
Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule, &Compile, &GenerateVertexShader>;
|
||||
|
||||
using FixedGeometryShaders =
|
||||
Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule, &Compile, &GenerateFixedGeometryShader>;
|
||||
|
||||
using FragmentShaders =
|
||||
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
|
||||
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
class RenderpassCache;
|
||||
|
||||
/**
|
||||
* Stores a collection of rasterizer pipelines used during rendering.
|
||||
* In addition handles descriptor set management.
|
||||
*/
|
||||
class PipelineCache {
|
||||
public:
|
||||
PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache);
|
||||
~PipelineCache();
|
||||
|
||||
/// Binds a pipeline using the provided information
|
||||
void BindPipeline(const PipelineInfo& info);
|
||||
|
||||
/// Binds a PICA decompiled vertex shader
|
||||
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
/// Binds a passthrough vertex shader
|
||||
void UseTrivialVertexShader();
|
||||
|
||||
/// Binds a PICA decompiled geometry shader
|
||||
void UseFixedGeometryShader(const Pica::Regs& regs);
|
||||
|
||||
/// Binds a passthrough geometry shader
|
||||
void UseTrivialGeometryShader();
|
||||
|
||||
/// Binds a fragment shader generated from PICA state
|
||||
void UseFragmentShader(const Pica::Regs& regs);
|
||||
|
||||
/// Binds a texture to the specified descriptor
|
||||
void BindTexture(u32 set, u32 binding, vk::ImageView view);
|
||||
|
||||
/// Binds a buffer to the specified descriptor
|
||||
void BindBuffer(u32 set, u32 binding, vk::Buffer buffer, u32 offset, u32 size);
|
||||
|
||||
/// Binds a buffer to the specified descriptor
|
||||
void BindTexelBuffer(u32 set, u32 binding, vk::BufferView buffer_view);
|
||||
|
||||
/// Binds a sampler to the specified descriptor
|
||||
void BindSampler(u32 set, u32 binding, vk::Sampler sampler);
|
||||
|
||||
/// Sets the viewport rectangle to the provided values
|
||||
void SetViewport(float x, float y, float width, float height);
|
||||
|
||||
/// Sets the scissor rectange to the provided values
|
||||
void SetScissor(s32 x, s32 y, u32 width, u32 height);
|
||||
|
||||
/// Marks all descriptor sets as dirty
|
||||
void MarkDescriptorSetsDirty();
|
||||
|
||||
private:
|
||||
/// Binds a resource to the provided binding
|
||||
void SetBinding(u32 set, u32 binding, DescriptorData data);
|
||||
|
||||
/// Applies dynamic pipeline state to the current command buffer
|
||||
void ApplyDynamic(const PipelineInfo& info);
|
||||
|
||||
/// Builds the rasterizer pipeline layout
|
||||
void BuildLayout();
|
||||
|
||||
/// Builds a rasterizer pipeline using the PipelineInfo struct
|
||||
vk::Pipeline BuildPipeline(const PipelineInfo& info);
|
||||
|
||||
/// Builds descriptor sets that reference the currently bound resources
|
||||
void BindDescriptorSets();
|
||||
|
||||
/// Loads the pipeline cache stored to disk
|
||||
void LoadDiskCache();
|
||||
|
||||
/// Stores the generated pipeline cache to disk
|
||||
void SaveDiskCache();
|
||||
|
||||
/// Ensures the disk data was generated from the same driver
|
||||
bool ValidateData(const u8* data, u32 size);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
|
||||
// Cached pipelines
|
||||
vk::PipelineCache pipeline_cache;
|
||||
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
|
||||
vk::Pipeline current_pipeline{};
|
||||
|
||||
// Cached layouts for the rasterizer pipelines
|
||||
vk::PipelineLayout layout;
|
||||
std::array<vk::DescriptorSetLayout, MAX_DESCRIPTOR_SETS> descriptor_set_layouts;
|
||||
std::array<vk::DescriptorUpdateTemplate, MAX_DESCRIPTOR_SETS> update_templates;
|
||||
|
||||
// Current data for the descriptor sets
|
||||
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
|
||||
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
|
||||
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
|
||||
|
||||
// Bound shader modules
|
||||
enum ProgramType : u32 {
|
||||
VS = 0,
|
||||
GS = 2,
|
||||
FS = 1
|
||||
};
|
||||
|
||||
std::array<vk::ShaderModule, MAX_SHADER_STAGES> current_shaders;
|
||||
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
|
||||
ProgrammableVertexShaders programmable_vertex_shaders;
|
||||
FixedGeometryShaders fixed_geometry_shaders;
|
||||
FragmentShaders fragment_shaders;
|
||||
vk::ShaderModule trivial_vertex_shader;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
130
src/video_core/renderer_vulkan/vk_platform.h
Normal file
130
src/video_core/renderer_vulkan/vk_platform.h
Normal file
@ -0,0 +1,130 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Include the vulkan platform specific header
|
||||
#if defined(ANDROID) || defined (__ANDROID__)
|
||||
#define VK_USE_PLATFORM_ANDROID_KHR 1
|
||||
#elif defined(_WIN32)
|
||||
#define VK_USE_PLATFORM_WIN32_KHR 1
|
||||
#elif defined(__APPLE__)
|
||||
#define VK_USE_PLATFORM_MACOS_MVK 1
|
||||
#define VK_USE_PLATFORM_METAL_EXT 1
|
||||
#else
|
||||
#ifdef WAYLAND_DISPLAY
|
||||
#define VK_USE_PLATFORM_WAYLAND_KHR 1
|
||||
#else // wayland
|
||||
#define VK_USE_PLATFORM_XLIB_KHR 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <vector>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) {
|
||||
const auto& window_info = emu_window.GetWindowInfo();
|
||||
vk::SurfaceKHR surface;
|
||||
|
||||
#if VK_USE_PLATFORM_WIN32_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::Windows) {
|
||||
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
|
||||
.hinstance = nullptr,
|
||||
.hwnd = static_cast<HWND>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
|
||||
}
|
||||
}
|
||||
#elif VK_USE_PLATFORM_XLIB_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::X11) {
|
||||
const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
|
||||
static_cast<Display*>(window_info.display_connection),
|
||||
reinterpret_cast<Window>(window_info.render_surface)};
|
||||
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
#elif VK_USE_PLATFORM_WAYLAND_KHR
|
||||
if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
|
||||
static_cast<wl_display*>(window_info.display_connection),
|
||||
static_cast<wl_surface*>(window_info.render_surface)};
|
||||
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!surface) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
|
||||
}
|
||||
|
||||
return surface;
|
||||
}
|
||||
|
||||
inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
|
||||
const auto properties = vk::enumerateInstanceExtensionProperties();
|
||||
if (properties.empty()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
|
||||
return std::vector<const char*>{};
|
||||
}
|
||||
|
||||
// Add the windowing system specific extension
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(6);
|
||||
|
||||
switch (window_type) {
|
||||
case Frontend::WindowSystemType::Headless:
|
||||
break;
|
||||
#if VK_USE_PLATFORM_WIN32_KHR
|
||||
case Frontend::WindowSystemType::Windows:
|
||||
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif VK_USE_PLATFORM_XLIB_KHR
|
||||
case Frontend::WindowSystemType::X11:
|
||||
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif VK_USE_PLATFORM_WAYLAND_KHR
|
||||
case Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
break;
|
||||
}
|
||||
|
||||
if (window_type != Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (enable_debug_utils) {
|
||||
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
for (const char* extension : extensions) {
|
||||
const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
|
||||
return std::strcmp(extension, prop.extensionName) == 0;
|
||||
});
|
||||
|
||||
if (iter == properties.end()) {
|
||||
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
|
||||
return std::vector<const char*>{};
|
||||
}
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
173
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
173
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormatColor(u32 index) {
|
||||
switch (index) {
|
||||
case 1: return vk::Format::eR8G8B8A8Unorm;
|
||||
case 2: return vk::Format::eR8G8B8Unorm;
|
||||
case 3: return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case 4: return vk::Format::eR5G6B5UnormPack16;
|
||||
case 5: return vk::Format::eR4G4B4A4UnormPack16;
|
||||
default: return vk::Format::eUndefined;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format ToVkFormatDepth(u32 index) {
|
||||
switch (index) {
|
||||
case 1: return vk::Format::eD16Unorm;
|
||||
case 2: return vk::Format::eX8D24UnormPack32;
|
||||
case 3: return vk::Format::eD24UnormS8Uint;
|
||||
default: return vk::Format::eUndefined;
|
||||
}
|
||||
}
|
||||
|
||||
RenderpassCache::RenderpassCache(const Instance& instance) : instance{instance} {
|
||||
// Pre-create all needed renderpasses by the renderer
|
||||
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
|
||||
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
|
||||
if (color == 0 && depth == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const vk::Format color_format =
|
||||
color == 0 ? vk::Format::eUndefined : instance.GetFormatAlternative(ToVkFormatColor(color));
|
||||
const vk::Format depth_stencil_format =
|
||||
depth == 0 ? vk::Format::eUndefined : instance.GetFormatAlternative(ToVkFormatDepth(depth));
|
||||
|
||||
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format,
|
||||
vk::AttachmentLoadOp::eLoad,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::ImageLayout::eColorAttachmentOptimal);
|
||||
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format,
|
||||
vk::AttachmentLoadOp::eClear,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::ImageLayout::eColorAttachmentOptimal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RenderpassCache::~RenderpassCache() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
|
||||
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
|
||||
if (color == 0 && depth == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& load_pass = cached_renderpasses[color][depth][0];
|
||||
auto& clear_pass = cached_renderpasses[color][depth][1];
|
||||
|
||||
// Destroy renderpasses
|
||||
device.destroyRenderPass(load_pass);
|
||||
device.destroyRenderPass(clear_pass);
|
||||
}
|
||||
}
|
||||
|
||||
device.destroyRenderPass(present_renderpass);
|
||||
}
|
||||
|
||||
void RenderpassCache::CreatePresentRenderpass(vk::Format format) {
|
||||
if (!present_renderpass) {
|
||||
present_renderpass = CreateRenderPass(format, vk::Format::eUndefined,
|
||||
vk::AttachmentLoadOp::eClear,
|
||||
vk::ImageLayout::eUndefined,
|
||||
vk::ImageLayout::ePresentSrcKHR);
|
||||
}
|
||||
}
|
||||
|
||||
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
|
||||
bool is_clear) const {
|
||||
const u32 color_index =
|
||||
color == VideoCore::PixelFormat::Invalid ? 0 : static_cast<u32>(color);
|
||||
const u32 depth_index =
|
||||
depth == VideoCore::PixelFormat::Invalid ? 0 : (static_cast<u32>(depth) - 13);
|
||||
|
||||
ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS);
|
||||
return cached_renderpasses[color_index][depth_index][is_clear];
|
||||
}
|
||||
|
||||
vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
|
||||
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const {
|
||||
// Define attachments
|
||||
|
||||
u32 attachment_count = 0;
|
||||
std::array<vk::AttachmentDescription, 2> attachments;
|
||||
|
||||
bool use_color = false;
|
||||
vk::AttachmentReference color_attachment_ref{};
|
||||
bool use_depth = false;
|
||||
vk::AttachmentReference depth_attachment_ref{};
|
||||
|
||||
if (color != vk::Format::eUndefined) {
|
||||
attachments[attachment_count] = vk::AttachmentDescription{
|
||||
.format = color,
|
||||
.loadOp = load_op,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
|
||||
.initialLayout = initial_layout,
|
||||
.finalLayout = final_layout
|
||||
};
|
||||
|
||||
color_attachment_ref = vk::AttachmentReference{
|
||||
.attachment = attachment_count++,
|
||||
.layout = vk::ImageLayout::eColorAttachmentOptimal
|
||||
};
|
||||
|
||||
use_color = true;
|
||||
}
|
||||
|
||||
if (depth != vk::Format::eUndefined) {
|
||||
attachments[attachment_count] = vk::AttachmentDescription{
|
||||
.format = depth,
|
||||
.loadOp = load_op,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
|
||||
.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal,
|
||||
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
};
|
||||
|
||||
depth_attachment_ref = vk::AttachmentReference{
|
||||
.attachment = attachment_count++,
|
||||
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
};
|
||||
|
||||
use_depth = true;
|
||||
}
|
||||
|
||||
// We also require only one subpass
|
||||
const vk::SubpassDescription subpass = {
|
||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = use_color ? 1u : 0u,
|
||||
.pColorAttachments = &color_attachment_ref,
|
||||
.pResolveAttachments = 0,
|
||||
.pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr
|
||||
};
|
||||
|
||||
const vk::RenderPassCreateInfo renderpass_info = {
|
||||
.attachmentCount = attachment_count,
|
||||
.pAttachments = attachments.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr
|
||||
};
|
||||
|
||||
// Create the renderpass
|
||||
vk::Device device = instance.GetDevice();
|
||||
return device.createRenderPass(renderpass_info);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
46
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
46
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
@ -0,0 +1,46 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Swapchain;
|
||||
|
||||
constexpr u32 MAX_COLOR_FORMATS = 5;
|
||||
constexpr u32 MAX_DEPTH_FORMATS = 3;
|
||||
|
||||
class RenderpassCache {
|
||||
public:
|
||||
RenderpassCache(const Instance& instance);
|
||||
~RenderpassCache();
|
||||
|
||||
/// Creates the renderpass used when rendering to the swapchain
|
||||
void CreatePresentRenderpass(vk::Format format);
|
||||
|
||||
/// Returns the renderpass associated with the color-depth format pair
|
||||
vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
|
||||
bool is_clear) const;
|
||||
|
||||
/// Returns the swapchain clear renderpass
|
||||
vk::RenderPass GetPresentRenderpass() const {
|
||||
return present_renderpass;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
|
||||
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
|
||||
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
vk::RenderPass present_renderpass{};
|
||||
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2];
|
||||
};
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
223
src/video_core/renderer_vulkan/vk_shader.cpp
Normal file
223
src/video_core/renderer_vulkan/vk_shader.cpp
Normal file
@ -0,0 +1,223 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <glslang/SPIRV/GlslangToSpv.h>
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr TBuiltInResource DefaultTBuiltInResource = {
|
||||
.maxLights = 32,
|
||||
.maxClipPlanes = 6,
|
||||
.maxTextureUnits = 32,
|
||||
.maxTextureCoords = 32,
|
||||
.maxVertexAttribs = 64,
|
||||
.maxVertexUniformComponents = 4096,
|
||||
.maxVaryingFloats = 64,
|
||||
.maxVertexTextureImageUnits = 32,
|
||||
.maxCombinedTextureImageUnits = 80,
|
||||
.maxTextureImageUnits = 32,
|
||||
.maxFragmentUniformComponents = 4096,
|
||||
.maxDrawBuffers = 32,
|
||||
.maxVertexUniformVectors = 128,
|
||||
.maxVaryingVectors = 8,
|
||||
.maxFragmentUniformVectors = 16,
|
||||
.maxVertexOutputVectors = 16,
|
||||
.maxFragmentInputVectors = 15,
|
||||
.minProgramTexelOffset = -8,
|
||||
.maxProgramTexelOffset = 7,
|
||||
.maxClipDistances = 8,
|
||||
.maxComputeWorkGroupCountX = 65535,
|
||||
.maxComputeWorkGroupCountY = 65535,
|
||||
.maxComputeWorkGroupCountZ = 65535,
|
||||
.maxComputeWorkGroupSizeX = 1024,
|
||||
.maxComputeWorkGroupSizeY = 1024,
|
||||
.maxComputeWorkGroupSizeZ = 64,
|
||||
.maxComputeUniformComponents = 1024,
|
||||
.maxComputeTextureImageUnits = 16,
|
||||
.maxComputeImageUniforms = 8,
|
||||
.maxComputeAtomicCounters = 8,
|
||||
.maxComputeAtomicCounterBuffers = 1,
|
||||
.maxVaryingComponents = 60,
|
||||
.maxVertexOutputComponents = 64,
|
||||
.maxGeometryInputComponents = 64,
|
||||
.maxGeometryOutputComponents = 128,
|
||||
.maxFragmentInputComponents = 128,
|
||||
.maxImageUnits = 8,
|
||||
.maxCombinedImageUnitsAndFragmentOutputs = 8,
|
||||
.maxCombinedShaderOutputResources = 8,
|
||||
.maxImageSamples = 0,
|
||||
.maxVertexImageUniforms = 0,
|
||||
.maxTessControlImageUniforms = 0,
|
||||
.maxTessEvaluationImageUniforms = 0,
|
||||
.maxGeometryImageUniforms = 0,
|
||||
.maxFragmentImageUniforms = 8,
|
||||
.maxCombinedImageUniforms = 8,
|
||||
.maxGeometryTextureImageUnits = 16,
|
||||
.maxGeometryOutputVertices = 256,
|
||||
.maxGeometryTotalOutputComponents = 1024,
|
||||
.maxGeometryUniformComponents = 1024,
|
||||
.maxGeometryVaryingComponents = 64,
|
||||
.maxTessControlInputComponents = 128,
|
||||
.maxTessControlOutputComponents = 128,
|
||||
.maxTessControlTextureImageUnits = 16,
|
||||
.maxTessControlUniformComponents = 1024,
|
||||
.maxTessControlTotalOutputComponents = 4096,
|
||||
.maxTessEvaluationInputComponents = 128,
|
||||
.maxTessEvaluationOutputComponents = 128,
|
||||
.maxTessEvaluationTextureImageUnits = 16,
|
||||
.maxTessEvaluationUniformComponents = 1024,
|
||||
.maxTessPatchComponents = 120,
|
||||
.maxPatchVertices = 32,
|
||||
.maxTessGenLevel = 64,
|
||||
.maxViewports = 16,
|
||||
.maxVertexAtomicCounters = 0,
|
||||
.maxTessControlAtomicCounters = 0,
|
||||
.maxTessEvaluationAtomicCounters = 0,
|
||||
.maxGeometryAtomicCounters = 0,
|
||||
.maxFragmentAtomicCounters = 8,
|
||||
.maxCombinedAtomicCounters = 8,
|
||||
.maxAtomicCounterBindings = 1,
|
||||
.maxVertexAtomicCounterBuffers = 0,
|
||||
.maxTessControlAtomicCounterBuffers = 0,
|
||||
.maxTessEvaluationAtomicCounterBuffers = 0,
|
||||
.maxGeometryAtomicCounterBuffers = 0,
|
||||
.maxFragmentAtomicCounterBuffers = 1,
|
||||
.maxCombinedAtomicCounterBuffers = 1,
|
||||
.maxAtomicCounterBufferSize = 16384,
|
||||
.maxTransformFeedbackBuffers = 4,
|
||||
.maxTransformFeedbackInterleavedComponents = 64,
|
||||
.maxCullDistances = 8,
|
||||
.maxCombinedClipAndCullDistances = 8,
|
||||
.maxSamples = 4,
|
||||
.maxMeshOutputVerticesNV = 256,
|
||||
.maxMeshOutputPrimitivesNV = 512,
|
||||
.maxMeshWorkGroupSizeX_NV = 32,
|
||||
.maxMeshWorkGroupSizeY_NV = 1,
|
||||
.maxMeshWorkGroupSizeZ_NV = 1,
|
||||
.maxTaskWorkGroupSizeX_NV = 32,
|
||||
.maxTaskWorkGroupSizeY_NV = 1,
|
||||
.maxTaskWorkGroupSizeZ_NV = 1,
|
||||
.maxMeshViewCountNV = 4,
|
||||
.maxDualSourceDrawBuffersEXT = 1,
|
||||
.limits = TLimits{
|
||||
.nonInductiveForLoops = 1,
|
||||
.whileLoops = 1,
|
||||
.doWhileLoops = 1,
|
||||
.generalUniformIndexing = 1,
|
||||
.generalAttributeMatrixVectorIndexing = 1,
|
||||
.generalVaryingIndexing = 1,
|
||||
.generalSamplerIndexing = 1,
|
||||
.generalVariableIndexing = 1,
|
||||
.generalConstantMatrixVectorIndexing = 1,
|
||||
}
|
||||
};
|
||||
|
||||
EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
|
||||
switch (stage) {
|
||||
case vk::ShaderStageFlagBits::eVertex:
|
||||
return EShLanguage::EShLangVertex;
|
||||
case vk::ShaderStageFlagBits::eGeometry:
|
||||
return EShLanguage::EShLangGeometry;
|
||||
case vk::ShaderStageFlagBits::eFragment:
|
||||
return EShLanguage::EShLangFragment;
|
||||
case vk::ShaderStageFlagBits::eCompute:
|
||||
return EShLanguage::EShLangCompute;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return EShLanguage::EShLangVertex;
|
||||
}
|
||||
|
||||
bool InitializeCompiler() {
|
||||
static bool glslang_initialized = false;
|
||||
|
||||
if (glslang_initialized) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!glslang::InitializeProcess()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::atexit([]() { glslang::FinalizeProcess(); });
|
||||
|
||||
glslang_initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
|
||||
ShaderOptimization level) {
|
||||
if (!InitializeCompiler()) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
EProfile profile = ECoreProfile;
|
||||
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
|
||||
EShLanguage lang = ToEshShaderStage(stage);
|
||||
|
||||
int default_version = 450;
|
||||
const char* pass_source_code = code.data();
|
||||
int pass_source_code_length = static_cast<int>(code.size());
|
||||
|
||||
auto shader = std::make_unique<glslang::TShader>(lang);
|
||||
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
|
||||
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
|
||||
|
||||
glslang::TShader::ForbidIncluder includer;
|
||||
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Even though there's only a single shader, we still need to link it to generate SPV
|
||||
auto program = std::make_unique<glslang::TProgram>();
|
||||
program->addShader(shader.get());
|
||||
if (!program->link(messages)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
|
||||
std::vector<u32> out_code;
|
||||
spv::SpvBuildLogger logger;
|
||||
glslang::SpvOptions options;
|
||||
|
||||
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
|
||||
if (level == ShaderOptimization::Debug) {
|
||||
intermediate->addSourceText(pass_source_code, pass_source_code_length);
|
||||
options.generateDebugInfo = true;
|
||||
options.disableOptimizer = true;
|
||||
options.optimizeSize = false;
|
||||
options.disassemble = false;
|
||||
options.validate = true;
|
||||
} else {
|
||||
options.disableOptimizer = false;
|
||||
options.stripDebugInfo = true;
|
||||
}
|
||||
|
||||
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
|
||||
|
||||
const std::string spv_messages = logger.getAllMessages();
|
||||
if (!spv_messages.empty()) {
|
||||
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
|
||||
}
|
||||
|
||||
const vk::ShaderModuleCreateInfo shader_info = {
|
||||
.codeSize = out_code.size() * sizeof(u32),
|
||||
.pCode = out_code.data()
|
||||
};
|
||||
|
||||
return device.createShaderModule(shader_info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
19
src/video_core/renderer_vulkan/vk_shader.h
Normal file
19
src/video_core/renderer_vulkan/vk_shader.h
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum class ShaderOptimization {
|
||||
High = 0,
|
||||
Debug = 1
|
||||
};
|
||||
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage,
|
||||
vk::Device device, ShaderOptimization level);
|
||||
|
||||
} // namespace Vulkan
|
1753
src/video_core/renderer_vulkan/vk_shader_gen.cpp
Normal file
1753
src/video_core/renderer_vulkan/vk_shader_gen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
247
src/video_core/renderer_vulkan/vk_shader_gen.h
Normal file
247
src/video_core/renderer_vulkan/vk_shader_gen.h
Normal file
@ -0,0 +1,247 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
ATTRIBUTE_TEXCOORD0,
|
||||
ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2,
|
||||
ATTRIBUTE_TEXCOORD0_W,
|
||||
ATTRIBUTE_NORMQUAT,
|
||||
ATTRIBUTE_VIEW,
|
||||
};
|
||||
|
||||
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
|
||||
struct TevStageConfigRaw {
|
||||
u32 sources_raw;
|
||||
u32 modifiers_raw;
|
||||
u32 ops_raw;
|
||||
u32 scales_raw;
|
||||
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
|
||||
Pica::TexturingRegs::TevStageConfig stage;
|
||||
stage.sources_raw = sources_raw;
|
||||
stage.modifiers_raw = modifiers_raw;
|
||||
stage.ops_raw = ops_raw;
|
||||
stage.const_color = 0;
|
||||
stage.scales_raw = scales_raw;
|
||||
return stage;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaFSConfigState {
|
||||
Pica::FramebufferRegs::CompareFunc alpha_test_func;
|
||||
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
|
||||
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
|
||||
bool texture2_use_coord1;
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
u8 combiner_buffer_input;
|
||||
|
||||
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
|
||||
Pica::TexturingRegs::FogMode fog_mode;
|
||||
bool fog_flip;
|
||||
bool alphablend_enable;
|
||||
Pica::FramebufferRegs::LogicOp logic_op;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
unsigned num;
|
||||
bool directional;
|
||||
bool two_sided_diffuse;
|
||||
bool dist_atten_enable;
|
||||
bool spot_atten_enable;
|
||||
bool geometric_factor_0;
|
||||
bool geometric_factor_1;
|
||||
bool shadow_enable;
|
||||
} light[8];
|
||||
|
||||
bool enable;
|
||||
unsigned src_num;
|
||||
Pica::LightingRegs::LightingBumpMode bump_mode;
|
||||
unsigned bump_selector;
|
||||
bool bump_renorm;
|
||||
bool clamp_highlights;
|
||||
|
||||
Pica::LightingRegs::LightingConfig config;
|
||||
bool enable_primary_alpha;
|
||||
bool enable_secondary_alpha;
|
||||
|
||||
bool enable_shadow;
|
||||
bool shadow_primary;
|
||||
bool shadow_secondary;
|
||||
bool shadow_invert;
|
||||
bool shadow_alpha;
|
||||
unsigned shadow_selector;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
bool abs_input;
|
||||
Pica::LightingRegs::LightingLutInput type;
|
||||
float scale;
|
||||
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset0;
|
||||
u32 lut_offset1;
|
||||
u32 lut_offset2;
|
||||
u32 lut_offset3;
|
||||
u32 lod_min;
|
||||
u32 lod_max;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
bool shadow_rendering;
|
||||
bool shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains all state used to generate the GLSL fragment shader that emulates the
|
||||
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
|
||||
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
|
||||
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
|
||||
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
|
||||
/// Construct a PicaFSConfig with the given Pica register configuration.
|
||||
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
|
||||
}
|
||||
|
||||
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
||||
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains common information to identify a GL vertex/geometry shader generated from
|
||||
* PICA vertex/geometry shader.
|
||||
*/
|
||||
struct PicaShaderConfigCommon {
|
||||
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
u64 program_hash;
|
||||
u64 swizzle_hash;
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
// output_map[output register index] -> output attribute index
|
||||
std::array<u32, 16> output_map;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL vertex shader generated from PICA vertex
|
||||
* shader.
|
||||
*/
|
||||
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
|
||||
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
state.Init(regs, setup);
|
||||
}
|
||||
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
|
||||
state = conf;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaGSConfigCommonRaw {
|
||||
void Init(const Pica::Regs& regs);
|
||||
|
||||
u32 vs_output_attributes;
|
||||
u32 gs_output_attributes;
|
||||
|
||||
struct SemanticMap {
|
||||
u32 attribute_index;
|
||||
u32 component_index;
|
||||
};
|
||||
|
||||
// semantic_maps[semantic name] -> GS output attribute index + component index
|
||||
std::array<SemanticMap, 24> semantic_maps;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
|
||||
* shader pipeline
|
||||
*/
|
||||
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
|
||||
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
|
||||
state.Init(regs);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
|
||||
* and directly passes them to the fragment shader.
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateTrivialVertexShader();
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code for the given VS program
|
||||
* @returns String of the shader source code; boost::none on failure
|
||||
*/
|
||||
std::optional<std::string> GenerateVertexShader(
|
||||
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config);
|
||||
|
||||
/**
|
||||
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config);
|
||||
|
||||
/**
|
||||
* Generates the GLSL fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateFragmentShader(const PicaFSConfig& config);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::PicaFSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PicaVSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PicaFixedGSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
241
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
241
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
@ -0,0 +1,241 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
|
||||
std::pair<vk::AccessFlags, vk::PipelineStageFlags> result{};
|
||||
switch (usage) {
|
||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead,
|
||||
vk::PipelineStageFlagBits::eVertexInput);
|
||||
break;
|
||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eIndexRead,
|
||||
vk::PipelineStageFlagBits::eVertexInput);
|
||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eUniformRead,
|
||||
vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eGeometryShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
|
||||
: instance{instance} {
|
||||
const vk::BufferCreateInfo buffer_info = {
|
||||
.size = size,
|
||||
.usage = usage
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
|
||||
&unsafe_buffer, &allocation, &alloc_info);
|
||||
|
||||
buffer = vk::Buffer{unsafe_buffer};
|
||||
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
|
||||
}
|
||||
|
||||
StagingBuffer::~StagingBuffer() {
|
||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
||||
}
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info)
|
||||
: instance{instance}, scheduler{scheduler}, info{info},
|
||||
staging{instance, info.size, vk::BufferUsageFlagBits::eTransferSrc} {
|
||||
|
||||
const vk::BufferCreateInfo buffer_info = {
|
||||
.size = info.size,
|
||||
.usage = info.usage | vk::BufferUsageFlagBits::eTransferDst
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
|
||||
&unsafe_buffer, &allocation, &alloc_info);
|
||||
|
||||
buffer = vk::Buffer{unsafe_buffer};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < info.views.size(); i++) {
|
||||
if (info.views[i] == vk::Format::eUndefined) {
|
||||
view_count = i;
|
||||
break;
|
||||
}
|
||||
|
||||
const vk::BufferViewCreateInfo view_info = {
|
||||
.buffer = buffer,
|
||||
.format = info.views[i],
|
||||
.range = info.size
|
||||
};
|
||||
|
||||
views[i] = device.createBufferView(view_info);
|
||||
}
|
||||
|
||||
available_size = info.size;
|
||||
}
|
||||
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
if (buffer) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
||||
for (u32 i = 0; i < view_count; i++) {
|
||||
device.destroyBufferView(views[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
||||
ASSERT(size <= info.size && alignment <= info.size);
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_offset = Common::AlignUp(buffer_offset, alignment);
|
||||
}
|
||||
|
||||
// Have we run out of available space?
|
||||
bool invalidate = false;
|
||||
if (available_size < size) {
|
||||
// Flush any pending writes before continuing
|
||||
Flush();
|
||||
|
||||
// If we are at the end of the buffer, start over
|
||||
if (buffer_offset + size > info.size) {
|
||||
Invalidate();
|
||||
invalidate = true;
|
||||
}
|
||||
|
||||
// Try to garbage collect old regions
|
||||
if (!UnlockFreeRegions(size)) {
|
||||
// Nuclear option: stall the GPU to remove all the locks
|
||||
LOG_WARNING(Render_Vulkan, "Buffer GPU stall");
|
||||
Invalidate();
|
||||
regions.clear();
|
||||
available_size = info.size;
|
||||
}
|
||||
}
|
||||
|
||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u32 size) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
|
||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(info.usage);
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = access_mask,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = buffer,
|
||||
.offset = buffer_offset,
|
||||
.size = size
|
||||
};
|
||||
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
||||
|
||||
|
||||
buffer_offset += size;
|
||||
available_size -= size;
|
||||
}
|
||||
|
||||
void StreamBuffer::Flush() {
|
||||
const u32 flush_size = buffer_offset - flush_start;
|
||||
if (flush_size > 0) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
const u32 flush_size = buffer_offset - flush_start;
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_start,
|
||||
.dstOffset = flush_start,
|
||||
.size = flush_size
|
||||
};
|
||||
|
||||
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
|
||||
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
||||
|
||||
// Lock the region
|
||||
const LockedRegion region = {
|
||||
.size = flush_size,
|
||||
.fence_counter = scheduler.GetFenceCounter()
|
||||
};
|
||||
|
||||
regions.emplace(flush_start, region);
|
||||
flush_start = buffer_offset;
|
||||
}
|
||||
}
|
||||
|
||||
void StreamBuffer::Invalidate() {
|
||||
buffer_offset = 0;
|
||||
flush_start = 0;
|
||||
}
|
||||
|
||||
bool StreamBuffer::UnlockFreeRegions(u32 target_size) {
|
||||
available_size = 0;
|
||||
|
||||
// Free regions that don't need waiting
|
||||
auto it = regions.lower_bound(buffer_offset);
|
||||
while (it != regions.end()) {
|
||||
const auto& [offset, region] = *it;
|
||||
if (region.fence_counter <= scheduler.GetFenceCounter()) {
|
||||
available_size += region.size;
|
||||
it = regions.erase(it);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If that wasn't enough, try waiting for some fences
|
||||
while (available_size < target_size) {
|
||||
const auto& [offset, region] = *it;
|
||||
|
||||
if (region.fence_counter > scheduler.GetFenceCounter()) {
|
||||
scheduler.WaitFence(region.fence_counter);
|
||||
}
|
||||
|
||||
available_size += region.size;
|
||||
it = regions.erase(it);
|
||||
}
|
||||
|
||||
return available_size >= target_size;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
87
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
87
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
@ -0,0 +1,87 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
|
||||
constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||
|
||||
struct BufferInfo {
|
||||
u32 size = 0;
|
||||
vk::BufferUsageFlagBits usage{};
|
||||
std::array<vk::Format, MAX_BUFFER_VIEWS> views{};
|
||||
};
|
||||
|
||||
struct LockedRegion {
|
||||
u32 size = 0;
|
||||
u64 fence_counter = 0;
|
||||
};
|
||||
|
||||
struct StagingBuffer {
|
||||
StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
|
||||
~StagingBuffer();
|
||||
|
||||
const Instance& instance;
|
||||
vk::Buffer buffer{};
|
||||
VmaAllocation allocation{};
|
||||
std::span<std::byte> mapped{};
|
||||
};
|
||||
|
||||
class StreamBuffer {
|
||||
public:
|
||||
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info);
|
||||
~StreamBuffer();
|
||||
|
||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
||||
|
||||
/// Commits size bytes from the currently mapped staging memory
|
||||
void Commit(u32 size = 0);
|
||||
|
||||
/// Flushes staging memory to the GPU buffer
|
||||
void Flush();
|
||||
|
||||
/// Returns the Vulkan buffer handle
|
||||
vk::Buffer GetHandle() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the requested buffer view
|
||||
const vk::BufferView& GetView(u32 index = 0) const {
|
||||
ASSERT(index < view_count);
|
||||
return views[index];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Invalidates the buffer offsets
|
||||
void Invalidate();
|
||||
|
||||
/// Removes the lock on regions whose fence counter has been reached by the GPU
|
||||
bool UnlockFreeRegions(u32 target_size);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
BufferInfo info{};
|
||||
StagingBuffer staging;
|
||||
|
||||
vk::Buffer buffer{};
|
||||
VmaAllocation allocation{};
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||
u32 view_count = 0;
|
||||
|
||||
u32 buffer_offset = 0;
|
||||
u32 flush_start = 0;
|
||||
s32 available_size = 0;
|
||||
std::map<u32, LockedRegion> regions;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
237
src/video_core/renderer_vulkan/vk_swapchain.cpp
Normal file
237
src/video_core/renderer_vulkan/vk_swapchain.cpp
Normal file
@ -0,0 +1,237 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <algorithm>
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Swapchain::Swapchain(const Instance& instance, CommandScheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, vk::SurfaceKHR surface)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, surface{surface} {
|
||||
|
||||
// Set the surface format early for RenderpassCache to create the present renderpass
|
||||
Configure(0, 0);
|
||||
renderpass_cache.CreatePresentRenderpass(surface_format.format);
|
||||
}
|
||||
|
||||
Swapchain::~Swapchain() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroySemaphore(render_finished);
|
||||
device.destroySemaphore(image_available);
|
||||
device.destroySwapchainKHR(swapchain);
|
||||
}
|
||||
|
||||
void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
is_outdated = false;
|
||||
is_suboptimal = false;
|
||||
|
||||
// Fetch information about the provided surface
|
||||
Configure(width, height);
|
||||
|
||||
const std::array queue_family_indices = {
|
||||
instance.GetGraphicsQueueFamilyIndex(),
|
||||
instance.GetPresentQueueFamilyIndex(),
|
||||
};
|
||||
|
||||
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
|
||||
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
|
||||
const vk::SharingMode sharing_mode =
|
||||
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
|
||||
const vk::SwapchainCreateInfoKHR swapchain_info = {
|
||||
.surface = surface,
|
||||
.minImageCount = image_count,
|
||||
.imageFormat = surface_format.format,
|
||||
.imageColorSpace = surface_format.colorSpace,
|
||||
.imageExtent = extent,
|
||||
.imageArrayLayers = 1,
|
||||
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
|
||||
.imageSharingMode = sharing_mode,
|
||||
.queueFamilyIndexCount = queue_family_indices_count,
|
||||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.presentMode = present_mode,
|
||||
.clipped = true,
|
||||
.oldSwapchain = swapchain
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
|
||||
|
||||
// If an old swapchain exists, destroy it and move the new one to its place.
|
||||
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
|
||||
device.destroySwapchainKHR(old_swapchain);
|
||||
}
|
||||
|
||||
// Create sync objects if not already created
|
||||
if (!image_available) {
|
||||
image_available = device.createSemaphore({});
|
||||
}
|
||||
|
||||
if (!render_finished) {
|
||||
render_finished = device.createSemaphore({});
|
||||
}
|
||||
|
||||
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
|
||||
auto images = device.getSwapchainImagesKHR(swapchain);
|
||||
|
||||
// Destroy the previous images
|
||||
for (auto& image : swapchain_images) {
|
||||
device.destroyImageView(image.image_view);
|
||||
device.destroyFramebuffer(image.framebuffer);
|
||||
}
|
||||
swapchain_images.clear();
|
||||
|
||||
std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image {
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = surface_format.format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
}
|
||||
};
|
||||
|
||||
vk::ImageView image_view = device.createImageView(view_info);
|
||||
const std::array attachments{image_view};
|
||||
|
||||
const vk::FramebufferCreateInfo framebuffer_info = {
|
||||
.renderPass = present_renderpass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = attachments.data(),
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
.layers = 1
|
||||
};
|
||||
|
||||
vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info);
|
||||
|
||||
return Image{
|
||||
.image = image,
|
||||
.image_view = image_view,
|
||||
.framebuffer = framebuffer
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for maximum of 1 second
|
||||
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
|
||||
|
||||
void Swapchain::AcquireNextImage() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE,
|
||||
¤t_image);
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eSuboptimalKHR:
|
||||
is_suboptimal = true;
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Present() {
|
||||
const vk::PresentInfoKHR present_info = {
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &render_finished,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = ¤t_image
|
||||
};
|
||||
|
||||
vk::Queue present_queue = instance.GetPresentQueue();
|
||||
vk::Result result = present_queue.presentKHR(present_info);
|
||||
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eSuboptimalKHR:
|
||||
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
|
||||
break;
|
||||
}
|
||||
|
||||
current_frame = (current_frame + 1) % swapchain_images.size();
|
||||
}
|
||||
|
||||
void Swapchain::Configure(u32 width, u32 height) {
|
||||
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
|
||||
|
||||
// Choose surface format
|
||||
auto formats = physical.getSurfaceFormatsKHR(surface);
|
||||
surface_format = formats[0];
|
||||
|
||||
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
|
||||
surface_format.format = vk::Format::eB8G8R8A8Unorm;
|
||||
} else {
|
||||
auto it = std::ranges::find_if(formats, [](vk::SurfaceFormatKHR format) -> bool {
|
||||
return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
|
||||
format.format == vk::Format::eB8G8R8A8Unorm;
|
||||
});
|
||||
|
||||
if (it == formats.end()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
|
||||
} else {
|
||||
surface_format = *it;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if a particular mode is supported, if it is, returns that mode.
|
||||
auto modes = physical.getSurfacePresentModesKHR(surface);
|
||||
|
||||
// FIFO is guaranteed by the Vulkan standard to be available
|
||||
present_mode = vk::PresentModeKHR::eFifo;
|
||||
auto iter = std::ranges::find_if(modes, [](vk::PresentModeKHR mode) {
|
||||
return vk::PresentModeKHR::eMailbox == mode;
|
||||
});
|
||||
|
||||
// Prefer Mailbox if present for lowest latency
|
||||
if (iter != modes.end()) {
|
||||
present_mode = vk::PresentModeKHR::eMailbox;
|
||||
}
|
||||
|
||||
// Query surface extent
|
||||
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
|
||||
extent = capabilities.currentExtent;
|
||||
|
||||
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
|
||||
extent.width = std::clamp(width, capabilities.minImageExtent.width,
|
||||
capabilities.maxImageExtent.width);
|
||||
extent.height = std::clamp(height, capabilities.minImageExtent.height,
|
||||
capabilities.maxImageExtent.height);
|
||||
}
|
||||
|
||||
// Select number of images in swap chain, we prefer one buffer in the background to work on
|
||||
image_count = capabilities.minImageCount + 1;
|
||||
if (capabilities.maxImageCount > 0) {
|
||||
image_count = std::min(image_count, capabilities.maxImageCount);
|
||||
}
|
||||
|
||||
// Prefer identity transform if possible
|
||||
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
|
||||
if (!(capabilities.supportedTransforms & transform)) {
|
||||
transform = capabilities.currentTransform;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
101
src/video_core/renderer_vulkan/vk_swapchain.h
Normal file
101
src/video_core/renderer_vulkan/vk_swapchain.h
Normal file
@ -0,0 +1,101 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class CommandScheduler;
|
||||
class RenderpassCache;
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
Swapchain(const Instance& instance, CommandScheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache,vk::SurfaceKHR surface);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height, bool vsync_enabled);
|
||||
|
||||
/// Acquires the next image in the swapchain.
|
||||
void AcquireNextImage();
|
||||
|
||||
/// Presents the current image and move to the next one
|
||||
void Present();
|
||||
|
||||
/// Returns current swapchain state
|
||||
vk::Extent2D GetExtent() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
/// Returns the swapchain surface
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Returns the swapchain format
|
||||
vk::SurfaceFormatKHR GetSurfaceFormat() const {
|
||||
return surface_format;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan swapchain handle
|
||||
vk::SwapchainKHR GetHandle() const {
|
||||
return swapchain;
|
||||
}
|
||||
|
||||
/// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes
|
||||
vk::Semaphore GetAvailableSemaphore() const {
|
||||
return image_available;
|
||||
}
|
||||
|
||||
/// Returns the semaphore that will signal when the current image will be presented
|
||||
vk::Semaphore GetPresentSemaphore() const {
|
||||
return render_finished;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain should be recreated
|
||||
bool NeedsRecreation() const {
|
||||
return is_suboptimal || is_outdated;
|
||||
}
|
||||
|
||||
private:
|
||||
void Configure(u32 width, u32 height);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
CommandScheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
vk::SwapchainKHR swapchain{};
|
||||
vk::SurfaceKHR surface{};
|
||||
|
||||
// Swapchain properties
|
||||
vk::SurfaceFormatKHR surface_format;
|
||||
vk::PresentModeKHR present_mode;
|
||||
vk::Extent2D extent;
|
||||
vk::SurfaceTransformFlagBitsKHR transform;
|
||||
u32 image_count;
|
||||
|
||||
struct Image {
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::Framebuffer framebuffer;
|
||||
};
|
||||
|
||||
// Swapchain state
|
||||
std::vector<Image> swapchain_images;
|
||||
vk::Semaphore image_available{};
|
||||
vk::Semaphore render_finished{};
|
||||
u32 current_image = 0;
|
||||
u32 current_frame = 0;
|
||||
bool vsync_enabled = false;
|
||||
bool is_outdated = true;
|
||||
bool is_suboptimal = true;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
178
src/video_core/renderer_vulkan/vk_task_scheduler.cpp
Normal file
178
src/video_core/renderer_vulkan/vk_task_scheduler.cpp
Normal file
@ -0,0 +1,178 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
const vk::CommandPoolCreateInfo command_pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
|
||||
};
|
||||
|
||||
command_pool = device.createCommandPool(command_pool_info);
|
||||
|
||||
constexpr std::array pool_sizes = {
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024}
|
||||
};
|
||||
|
||||
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
|
||||
.maxSets = 2048,
|
||||
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
|
||||
.pPoolSizes = pool_sizes.data()
|
||||
};
|
||||
|
||||
const vk::CommandBufferAllocateInfo buffer_info = {
|
||||
.commandPool = command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
|
||||
};
|
||||
|
||||
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
|
||||
for (std::size_t i = 0; i < commands.size(); i++) {
|
||||
commands[i] = ExecutionSlot{
|
||||
.fence = device.createFence({}),
|
||||
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
|
||||
.render_command_buffer = command_buffers[2 * i],
|
||||
.upload_command_buffer = command_buffers[2 * i + 1],
|
||||
};
|
||||
}
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
// Begin first command
|
||||
auto& command = commands[current_command];
|
||||
command.render_command_buffer.begin(begin_info);
|
||||
command.fence_counter = next_fence_counter++;
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler() {
|
||||
// Submit any remaining work
|
||||
Submit(true, false);
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (const auto& command : commands) {
|
||||
device.destroyFence(command.fence);
|
||||
device.destroyDescriptorPool(command.descriptor_pool);
|
||||
}
|
||||
|
||||
device.destroyCommandPool(command_pool);
|
||||
}
|
||||
|
||||
void TaskScheduler::Synchronize(u32 slot) {
|
||||
const auto& command = commands[slot];
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
if (command.fence_counter > completed_fence_counter) {
|
||||
if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Waiting for fences failed!");
|
||||
}
|
||||
|
||||
completed_fence_counter = command.fence_counter;
|
||||
}
|
||||
|
||||
device.resetFences(command.fence);
|
||||
device.resetDescriptorPool(command.descriptor_pool);
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitFence(u32 counter) {
|
||||
for (u32 i = 0; i < SCHEDULER_COMMAND_COUNT; i++) {
|
||||
if (commands[i].fence_counter == counter) {
|
||||
return Synchronize(i);
|
||||
}
|
||||
}
|
||||
|
||||
UNREACHABLE_MSG("Invalid fence counter!");
|
||||
}
|
||||
|
||||
void TaskScheduler::Submit(bool wait_completion, bool begin_next,
|
||||
vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) {
|
||||
const auto& command = commands[current_command];
|
||||
command.render_command_buffer.end();
|
||||
if (command.use_upload_buffer) {
|
||||
command.upload_command_buffer.end();
|
||||
}
|
||||
|
||||
u32 command_buffer_count = 0;
|
||||
std::array<vk::CommandBuffer, 2> command_buffers;
|
||||
|
||||
if (command.use_upload_buffer) {
|
||||
command_buffers[command_buffer_count++] = command.upload_command_buffer;
|
||||
}
|
||||
|
||||
command_buffers[command_buffer_count++] = command.render_command_buffer;
|
||||
|
||||
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
|
||||
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
|
||||
const vk::PipelineStageFlags wait_stage_masks =
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = wait_semaphore_count,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = &wait_stage_masks,
|
||||
.commandBufferCount = command_buffer_count,
|
||||
.pCommandBuffers = command_buffers.data(),
|
||||
.signalSemaphoreCount = signal_semaphore_count,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
};
|
||||
|
||||
vk::Queue queue = instance.GetGraphicsQueue();
|
||||
queue.submit(submit_info, command.fence);
|
||||
|
||||
// Block host until the GPU catches up
|
||||
if (wait_completion) {
|
||||
Synchronize(current_command);
|
||||
}
|
||||
|
||||
// Switch to next cmdbuffer.
|
||||
if (begin_next) {
|
||||
SwitchSlot();
|
||||
}
|
||||
}
|
||||
|
||||
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
|
||||
auto& command = commands[current_command];
|
||||
if (!command.use_upload_buffer) {
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
command.upload_command_buffer.begin(begin_info);
|
||||
command.use_upload_buffer = true;
|
||||
}
|
||||
|
||||
return command.upload_command_buffer;
|
||||
}
|
||||
|
||||
void TaskScheduler::SwitchSlot() {
|
||||
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
|
||||
auto& command = commands[current_command];
|
||||
|
||||
// Wait for the GPU to finish with all resources for this command.
|
||||
Synchronize(current_command);
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
// Begin the next command buffer.
|
||||
command.render_command_buffer.begin(begin_info);
|
||||
command.fence_counter = next_fence_counter++;
|
||||
command.use_upload_buffer = false;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
82
src/video_core/renderer_vulkan/vk_task_scheduler.h
Normal file
82
src/video_core/renderer_vulkan/vk_task_scheduler.h
Normal file
@ -0,0 +1,82 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
|
||||
|
||||
class Buffer;
|
||||
class Instance;
|
||||
|
||||
class TaskScheduler {
|
||||
public:
|
||||
TaskScheduler(const Instance& instance);
|
||||
~TaskScheduler();
|
||||
|
||||
/// Blocks the host until the current command completes execution
|
||||
void Synchronize(u32 slot);
|
||||
|
||||
/// Waits for the fence counter to be reached by the GPU
|
||||
void WaitFence(u32 counter);
|
||||
|
||||
/// Submits the current command to the graphics queue
|
||||
void Submit(bool wait_completion = false, bool begin_next = true,
|
||||
vk::Semaphore wait = VK_NULL_HANDLE,
|
||||
vk::Semaphore signal = VK_NULL_HANDLE);
|
||||
|
||||
/// Returns the command buffer used for early upload operations.
|
||||
vk::CommandBuffer GetUploadCommandBuffer();
|
||||
|
||||
/// Returns the command buffer used for rendering
|
||||
vk::CommandBuffer GetRenderCommandBuffer() const {
|
||||
return commands[current_command].render_command_buffer;
|
||||
}
|
||||
|
||||
/// Returns the current descriptor pool
|
||||
vk::DescriptorPool GetDescriptorPool() const {
|
||||
return commands[current_command].descriptor_pool;
|
||||
}
|
||||
|
||||
/// Returns the index of the current command slot
|
||||
u32 GetCurrentSlotIndex() const {
|
||||
return current_command;
|
||||
}
|
||||
|
||||
/// Returns the last completed fence counter
|
||||
u64 GetFenceCounter() const {
|
||||
return completed_fence_counter;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Activates the next command slot and optionally waits for its completion
|
||||
void SwitchSlot();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
u64 next_fence_counter = 1;
|
||||
u64 completed_fence_counter = 0;
|
||||
|
||||
struct ExecutionSlot {
|
||||
bool use_upload_buffer = false;
|
||||
u64 fence_counter = 0;
|
||||
vk::Fence fence{};
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::CommandBuffer render_command_buffer{};
|
||||
vk::CommandBuffer upload_command_buffer{};
|
||||
};
|
||||
|
||||
vk::CommandPool command_pool{};
|
||||
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands;
|
||||
u32 current_command = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
562
src/video_core/renderer_vulkan/vk_texture_runtime.cpp
Normal file
562
src/video_core/renderer_vulkan/vk_texture_runtime.cpp
Normal file
@ -0,0 +1,562 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
switch (format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return vk::Format::eR8G8B8Unorm;
|
||||
case VideoCore::PixelFormat::RGB5A1:
|
||||
return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case VideoCore::PixelFormat::RGB565:
|
||||
return vk::Format::eR5G6B5UnormPack16;
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
case VideoCore::PixelFormat::D16:
|
||||
return vk::Format::eD16Unorm;
|
||||
case VideoCore::PixelFormat::D24:
|
||||
return vk::Format::eX8D24UnormPack32;
|
||||
case VideoCore::PixelFormat::D24S8:
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
case VideoCore::PixelFormat::Invalid:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
|
||||
return vk::Format::eUndefined;
|
||||
default:
|
||||
// Use default case for the texture formats
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
|
||||
switch (type) {
|
||||
case VideoCore::SurfaceType::Color:
|
||||
case VideoCore::SurfaceType::Texture:
|
||||
case VideoCore::SurfaceType::Fill:
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
case VideoCore::SurfaceType::Depth:
|
||||
return vk::ImageAspectFlagBits::eDepth;
|
||||
case VideoCore::SurfaceType::DepthStencil:
|
||||
return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid surface type!");
|
||||
}
|
||||
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
|
||||
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler} {
|
||||
|
||||
for (auto& buffer : staging_buffers) {
|
||||
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst);
|
||||
}
|
||||
}
|
||||
|
||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
const u32 offset = staging_offsets[current_slot];
|
||||
if (offset + size > STAGING_BUFFER_SIZE) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const auto& buffer = staging_buffers[current_slot];
|
||||
return StagingData{
|
||||
.buffer = buffer->buffer,
|
||||
.mapped = buffer->mapped.subspan(offset, size),
|
||||
.buffer_offset = offset
|
||||
};
|
||||
}
|
||||
|
||||
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
|
||||
staging_offsets[new_slot] = 0;
|
||||
}
|
||||
|
||||
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
|
||||
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
const VideoCore::HostTextureTag key = {
|
||||
.format = format,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.layers = layers
|
||||
};
|
||||
|
||||
// Attempt to recycle an unused allocation
|
||||
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
|
||||
ImageAlloc alloc = std::move(it->second);
|
||||
texture_recycler.erase(it);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
// Create a new allocation
|
||||
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
|
||||
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
|
||||
|
||||
const vk::ImageCreateInfo image_info = {
|
||||
.flags = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageCreateFlagBits::eCubeCompatible :
|
||||
vk::ImageCreateFlags{},
|
||||
.imageType = vk::ImageType::e2D,
|
||||
.format = vk_format,
|
||||
.extent = {width, height, 1},
|
||||
.mipLevels = std::bit_width(std::max(width, height)),
|
||||
.arrayLayers = layers,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = GetImageUsage(aspect),
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
|
||||
};
|
||||
|
||||
VkImage unsafe_image{};
|
||||
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
|
||||
VmaAllocation allocation;
|
||||
|
||||
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
|
||||
&unsafe_image, &allocation, nullptr);
|
||||
if (result != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
vk::Image image = vk::Image{unsafe_image};
|
||||
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageViewType::eCube :
|
||||
vk::ImageViewType::e2D,
|
||||
.format = vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
}
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::ImageView image_view = device.createImageView(view_info);
|
||||
|
||||
return ImageAlloc{
|
||||
.image = image,
|
||||
.image_view = image_view,
|
||||
.allocation = allocation,
|
||||
};
|
||||
}
|
||||
|
||||
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
VideoCore::ClearValue value) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1);
|
||||
|
||||
// For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage
|
||||
if (clear.texture_rect == surface.GetScaledRect()) {
|
||||
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
|
||||
if (aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
const vk::ClearColorValue clear_color = {
|
||||
.float32 = std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]})
|
||||
};
|
||||
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = clear.texture_level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
};
|
||||
|
||||
command_buffer.clearColorImage(surface.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
clear_color, range);
|
||||
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
const vk::ClearDepthStencilValue clear_depth = {
|
||||
.depth = value.depth,
|
||||
.stencil = value.stencil
|
||||
};
|
||||
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = clear.texture_level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
};
|
||||
|
||||
command_buffer.clearDepthStencilImage(surface.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
clear_depth, range);
|
||||
}
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Partial clears are unimplemented!");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
|
||||
const vk::ImageCopy image_copy = {
|
||||
.srcSubresource = {
|
||||
.aspectMask = ToVkAspect(source.type),
|
||||
.mipLevel = copy.src_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
|
||||
.dstSubresource = {
|
||||
.aspectMask = ToVkAspect(dest.type),
|
||||
.mipLevel = copy.dst_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
|
||||
.extent = {copy.extent.width, copy.extent.height, 1}
|
||||
};
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
|
||||
dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
|
||||
|
||||
command_buffer.copyImage(source.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
dest.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
|
||||
dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
|
||||
|
||||
const std::array source_offsets = {
|
||||
vk::Offset3D{static_cast<s32>(blit.src_rect.left), static_cast<s32>(blit.src_rect.bottom), 0},
|
||||
vk::Offset3D{static_cast<s32>(blit.src_rect.right), static_cast<s32>(blit.src_rect.top), 1}
|
||||
};
|
||||
|
||||
const std::array dest_offsets = {
|
||||
vk::Offset3D{static_cast<s32>(blit.dst_rect.left), static_cast<s32>(blit.dst_rect.bottom), 0},
|
||||
vk::Offset3D{static_cast<s32>(blit.dst_rect.right), static_cast<s32>(blit.dst_rect.top), 1}
|
||||
};
|
||||
|
||||
const vk::ImageBlit blit_area = {
|
||||
.srcSubresource = {
|
||||
.aspectMask = ToVkAspect(source.type),
|
||||
.mipLevel = blit.src_level,
|
||||
.baseArrayLayer = blit.src_layer,
|
||||
.layerCount = 1
|
||||
},
|
||||
.srcOffsets = source_offsets,
|
||||
.dstSubresource = {
|
||||
.aspectMask = ToVkAspect(dest.type),
|
||||
.mipLevel = blit.dst_level,
|
||||
.baseArrayLayer = blit.dst_layer,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffsets = dest_offsets
|
||||
};
|
||||
|
||||
command_buffer.blitImage(source.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
dest.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
blit_area, vk::Filter::eLinear);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
|
||||
// TODO: Investigate AMD single pass downsampler
|
||||
s32 current_width = surface.GetScaledWidth();
|
||||
s32 current_height = surface.GetScaledHeight();
|
||||
|
||||
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
|
||||
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
for (u32 i = 1; i < levels; i++) {
|
||||
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
|
||||
surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, i, 1);
|
||||
|
||||
const std::array source_offsets = {
|
||||
vk::Offset3D{0, 0, 0},
|
||||
vk::Offset3D{current_width, current_height, 1}
|
||||
};
|
||||
|
||||
const std::array dest_offsets = {
|
||||
vk::Offset3D{0, 0, 0},
|
||||
vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
|
||||
current_height > 1 ? current_height / 2 : 1, 1}
|
||||
};
|
||||
|
||||
const vk::ImageBlit blit_area = {
|
||||
.srcSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = i - 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.srcOffsets = source_offsets,
|
||||
.dstSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = i,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffsets = dest_offsets
|
||||
};
|
||||
|
||||
command_buffer.blitImage(surface.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
surface.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
blit_area, vk::Filter::eLinear);
|
||||
}
|
||||
}
|
||||
|
||||
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
|
||||
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
|
||||
scheduler{runtime.GetScheduler()} {
|
||||
const ImageAlloc alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(),
|
||||
params.pixel_format, texture_type);
|
||||
|
||||
allocation = alloc.allocation;
|
||||
image_view = alloc.image_view;
|
||||
image = alloc.image;
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Upload);
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
ScaledUpload(upload);
|
||||
} else {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
const VideoCore::Rect2D rect = upload.texture_rect;
|
||||
const vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging.buffer_offset,
|
||||
.bufferRowLength = rect.GetWidth(),
|
||||
.bufferImageHeight = rect.GetHeight(),
|
||||
.imageSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = upload.texture_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
|
||||
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
|
||||
};
|
||||
|
||||
TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
|
||||
command_buffer.copyBufferToImage(staging.buffer, image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
copy_region);
|
||||
}
|
||||
|
||||
InvalidateAllWatcher();
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Download);
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
ScaledDownload(download);
|
||||
} else {
|
||||
u32 region_count = 0;
|
||||
std::array<vk::BufferImageCopy, 2> copy_regions;
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
const VideoCore::Rect2D rect = download.texture_rect;
|
||||
vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging.buffer_offset,
|
||||
.bufferRowLength = rect.GetWidth(),
|
||||
.bufferImageHeight = rect.GetHeight(),
|
||||
.imageSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = download.texture_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
|
||||
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
|
||||
};
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
copy_regions[region_count++] = copy_region;
|
||||
} else if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
copy_region.bufferOffset += staging.mapped.size();
|
||||
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
}
|
||||
}
|
||||
|
||||
TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
|
||||
|
||||
// Copy pixel data to the staging buffer
|
||||
command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
staging.buffer, region_count, copy_regions.data());
|
||||
|
||||
scheduler.Submit(true);
|
||||
}
|
||||
}
|
||||
|
||||
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
|
||||
/*const u32 rect_width = download.texture_rect.GetWidth();
|
||||
const u32 rect_height = download.texture_rect.GetHeight();
|
||||
|
||||
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
|
||||
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
|
||||
VideoCore::TextureType::Texture2D);
|
||||
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
|
||||
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture);
|
||||
|
||||
// Blit the scaled rectangle to the unscaled texture
|
||||
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
|
||||
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
|
||||
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
|
||||
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
if (driver.IsOpenGLES()) {
|
||||
const auto& downloader_es = runtime.GetDownloaderES();
|
||||
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
rect_height, rect_width,
|
||||
reinterpret_cast<void*>(download.buffer_offset));
|
||||
} else {
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(download.buffer_offset));
|
||||
}*/
|
||||
}
|
||||
|
||||
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
|
||||
/*const u32 rect_width = upload.texture_rect.GetWidth();
|
||||
const u32 rect_height = upload.texture_rect.GetHeight();
|
||||
|
||||
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
|
||||
VideoCore::TextureType::Texture2D);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
|
||||
|
||||
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
|
||||
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
|
||||
|
||||
const auto scaled_rect = upload.texture_rect * res_scale;
|
||||
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
|
||||
const auto& filterer = runtime.GetFilterer();
|
||||
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
|
||||
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
|
||||
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture);
|
||||
|
||||
// If filtering fails, resort to normal blitting
|
||||
glBlitFramebuffer(0, 0, rect_width, rect_height,
|
||||
upload.texture_rect.left, upload.texture_rect.bottom,
|
||||
upload.texture_rect.right, upload.texture_rect.top,
|
||||
MakeBufferMask(type), GL_LINEAR);
|
||||
}*/
|
||||
}
|
||||
|
||||
void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
|
||||
u32 level, u32 level_count) {
|
||||
if (new_layout == layout) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct LayoutInfo {
|
||||
vk::AccessFlags access;
|
||||
vk::PipelineStageFlags stage;
|
||||
};
|
||||
|
||||
// Get optimal transition settings for every image layout. Settings taken from Dolphin
|
||||
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
|
||||
LayoutInfo info;
|
||||
switch (layout) {
|
||||
case vk::ImageLayout::eUndefined:
|
||||
// Layout undefined therefore contents undefined, and we don't care what happens to it.
|
||||
info.access = vk::AccessFlagBits::eNone;
|
||||
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
|
||||
break;
|
||||
case vk::ImageLayout::ePreinitialized:
|
||||
// Image has been pre-initialized by the host, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eHostWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eHost;
|
||||
break;
|
||||
case vk::ImageLayout::eColorAttachmentOptimal:
|
||||
// Image was being used as a color attachment, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eColorAttachmentRead |
|
||||
vk::AccessFlagBits::eColorAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
break;
|
||||
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
|
||||
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
break;
|
||||
case vk::ImageLayout::ePresentSrcKHR:
|
||||
info.access = vk::AccessFlagBits::eNone;
|
||||
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
|
||||
break;
|
||||
case vk::ImageLayout::eShaderReadOnlyOptimal:
|
||||
// Image was being used as a shader resource, make sure all reads have finished.
|
||||
info.access = vk::AccessFlagBits::eShaderRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
|
||||
break;
|
||||
case vk::ImageLayout::eTransferSrcOptimal:
|
||||
// Image was being used as a copy source, ensure all reads have finished.
|
||||
info.access = vk::AccessFlagBits::eTransferRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eTransfer;
|
||||
break;
|
||||
case vk::ImageLayout::eTransferDstOptimal:
|
||||
// Image was being used as a copy destination, ensure all writes have finished.
|
||||
info.access = vk::AccessFlagBits::eTransferWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eTransfer;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return info;
|
||||
};
|
||||
|
||||
LayoutInfo source = GetLayoutInfo(layout);
|
||||
LayoutInfo dest = GetLayoutInfo(new_layout);
|
||||
|
||||
const vk::ImageMemoryBarrier barrier = {
|
||||
.srcAccessMask = source.access,
|
||||
.dstAccessMask = dest.access,
|
||||
.oldLayout = layout,
|
||||
.newLayout = new_layout,
|
||||
.image = image,
|
||||
.subresourceRange = {aspect, level, level_count, 0, 1}
|
||||
};
|
||||
|
||||
command_buffer.pipelineBarrier(source.stage, dest.stage,
|
||||
vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, barrier);
|
||||
|
||||
layout = new_layout;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
129
src/video_core/renderer_vulkan/vk_texture_runtime.h
Normal file
129
src/video_core/renderer_vulkan/vk_texture_runtime.h
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <span>
|
||||
#include <set>
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/rasterizer_cache/types.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct StagingData {
|
||||
vk::Buffer buffer;
|
||||
std::span<std::byte> mapped{};
|
||||
u32 buffer_offset = 0;
|
||||
};
|
||||
|
||||
struct ImageAlloc {
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
VmaAllocation allocation;
|
||||
};
|
||||
|
||||
class Instance;
|
||||
class Surface;
|
||||
|
||||
/**
|
||||
* Provides texture manipulation functions to the rasterizer cache
|
||||
* Separating this into a class makes it easier to abstract graphics API code
|
||||
*/
|
||||
class TextureRuntime {
|
||||
friend class Surface;
|
||||
public:
|
||||
TextureRuntime(const Instance& instance, TaskScheduler& scheduler);
|
||||
~TextureRuntime() = default;
|
||||
|
||||
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
||||
StagingData FindStaging(u32 size, bool upload);
|
||||
|
||||
/// Performs operations that need to be done on every scheduler slot switch
|
||||
void OnSlotSwitch(u32 new_slot);
|
||||
|
||||
/// Fills the rectangle of the texture with the clear value provided
|
||||
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
VideoCore::ClearValue value);
|
||||
|
||||
/// Copies a rectangle of src_tex to another rectange of dst_rect
|
||||
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
|
||||
|
||||
/// Blits a rectangle of src_tex to another rectange of dst_rect
|
||||
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
|
||||
|
||||
/// Generates mipmaps for all the available levels of the texture
|
||||
void GenerateMipmaps(Surface& surface, u32 max_level);
|
||||
|
||||
private:
|
||||
/// Allocates a vulkan image possibly resusing an existing one
|
||||
ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type);
|
||||
|
||||
/// Returns the current Vulkan instance
|
||||
const Instance& GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
/// Returns the current Vulkan scheduler
|
||||
TaskScheduler& GetScheduler() const {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
|
||||
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
|
||||
std::unordered_map<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
|
||||
};
|
||||
|
||||
class Surface : public VideoCore::SurfaceBase<Surface> {
|
||||
friend class TextureRuntime;
|
||||
public:
|
||||
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
|
||||
~Surface() override = default;
|
||||
|
||||
/// Uploads pixel data in staging to a rectangle region of the surface texture
|
||||
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
|
||||
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
|
||||
|
||||
private:
|
||||
/// Downloads scaled image by downscaling the requested rectangle
|
||||
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
|
||||
|
||||
/// Uploads pixel data to scaled texture
|
||||
void ScaledUpload(const VideoCore::BufferTextureCopy& upload);
|
||||
|
||||
/// Overrides the image layout of the mip level range
|
||||
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
|
||||
|
||||
/// Transitions the mip level range of the surface to new_layout
|
||||
void TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
|
||||
u32 level, u32 level_count);
|
||||
|
||||
private:
|
||||
TextureRuntime& runtime;
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
|
||||
vk::Image image{};
|
||||
vk::ImageView image_view{};
|
||||
VmaAllocation allocation = nullptr;
|
||||
vk::Format internal_format = vk::Format::eUndefined;
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
};
|
||||
|
||||
struct Traits {
|
||||
using Runtime = TextureRuntime;
|
||||
using Surface = Surface;
|
||||
};
|
||||
|
||||
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
|
||||
|
||||
} // namespace Vulkan
|
97
src/video_core/shader/shader_cache.h
Normal file
97
src/video_core/shader/shader_cache.h
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Pica::Shader {
|
||||
|
||||
template <typename ShaderType>
|
||||
using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
|
||||
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
|
||||
std::string(*CodeGenerator)(const KeyType&)>
|
||||
class ShaderCache {
|
||||
public:
|
||||
ShaderCache() {}
|
||||
~ShaderCache() = default;
|
||||
|
||||
/// Returns a shader handle generated from the provided config
|
||||
template <typename... Args>
|
||||
auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult<ShaderType> {
|
||||
auto [iter, new_shader] = shaders.emplace(config, ShaderType{});
|
||||
auto& shader = iter->second;
|
||||
|
||||
if (new_shader) {
|
||||
std::string code = CodeGenerator(config);
|
||||
shader = ModuleCompiler(code, args...);
|
||||
return std::make_pair(shader, code);
|
||||
}
|
||||
|
||||
return std::make_pair(shader, std::nullopt);
|
||||
}
|
||||
|
||||
void Inject(const KeyType& key, ShaderType&& shader) {
|
||||
shaders.emplace(key, std::move(shader));
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<KeyType, ShaderType> shaders;
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a cache designed for shaders translated from PICA shaders. The first cache matches the
|
||||
* config structure like a normal cache does. On cache miss, the second cache matches the generated
|
||||
* GLSL code. The configuration is like this because there might be leftover code in the PICA shader
|
||||
* program buffer from the previous shader, which is hashed into the config, resulting several
|
||||
* different config values from the same shader program.
|
||||
*/
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
|
||||
std::optional<std::string>(*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyType&)>
|
||||
class ShaderDoubleCache {
|
||||
public:
|
||||
ShaderDoubleCache() = default;
|
||||
~ShaderDoubleCache() = default;
|
||||
|
||||
template <typename... Args>
|
||||
auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) -> ShaderCacheResult<ShaderType> {
|
||||
if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
|
||||
auto code = CodeGenerator(setup, key);
|
||||
if (!code) {
|
||||
shader_map[key] = nullptr;
|
||||
return std::make_pair(ShaderType{}, std::nullopt);
|
||||
}
|
||||
|
||||
std::string& program = code.value();
|
||||
auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{});
|
||||
auto& shader = iter->second;
|
||||
|
||||
if (new_shader) {
|
||||
shader = ModuleCompiler(program, args...);
|
||||
}
|
||||
|
||||
shader_map[key] = &shader;
|
||||
return std::make_pair(shader, std::move(program));
|
||||
} else {
|
||||
return std::make_pair(*map_iter->second, std::nullopt);
|
||||
}
|
||||
}
|
||||
|
||||
void Inject(const KeyType& key, std::string decomp, ShaderType&& program) {
|
||||
const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first;
|
||||
|
||||
auto& cached_shader = iter->second;
|
||||
shader_map.insert_or_assign(key, &cached_shader);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<KeyType, ShaderType*> shader_map;
|
||||
std::unordered_map<std::string, ShaderType> shader_cache;
|
||||
};
|
||||
|
||||
} // namespace Pica::Shader
|
Reference in New Issue
Block a user