diff --git a/.gitmodules b/.gitmodules index b45d17bdb..3e33bd5a1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "vulkan-headers"] path = externals/vulkan-headers url = https://github.com/KhronosGroup/Vulkan-Headers +[submodule "glslang"] + path = externals/glslang + url = https://github.com/KhronosGroup/glslang diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index c2385c48e..5fc537ab6 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -60,6 +60,9 @@ endif() # Glad add_subdirectory(glad) +# glslang +add_subdirectory(glslang) + # inih add_subdirectory(inih) diff --git a/externals/glslang b/externals/glslang new file mode 160000 index 000000000..c0cf8ad87 --- /dev/null +++ b/externals/glslang @@ -0,0 +1 @@ +Subproject commit c0cf8ad87627227c7f1d3be6177bbf2832d3fc1b diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 0bd461808..b95a9cd6e 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -299,6 +299,40 @@ public: } }; +static Frontend::WindowSystemType GetWindowSystemType() { + // Determine WSI type based on Qt platform. + QString platform_name = QGuiApplication::platformName(); + if (platform_name == QStringLiteral("windows")) + return Frontend::WindowSystemType::Windows; + else if (platform_name == QStringLiteral("xcb")) + return Frontend::WindowSystemType::X11; + else if (platform_name == QStringLiteral("wayland")) + return Frontend::WindowSystemType::Wayland; + + LOG_CRITICAL(Frontend, "Unknown Qt platform!"); + return Frontend::WindowSystemType::Windows; +} + +static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { + Frontend::EmuWindow::WindowSystemInfo wsi; + wsi.type = GetWindowSystemType(); + + // Our Win32 Qt external doesn't have the private API. +#if defined(WIN32) || defined(__APPLE__) + wsi.render_surface = window ? reinterpret_cast(window->winId()) : nullptr; +#else + QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); + wsi.display_connection = pni->nativeResourceForWindow("display", window); + if (wsi.type == Frontend::WindowSystemType::Wayland) + wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; + else + wsi.render_surface = window ? reinterpret_cast(window->winId()) : nullptr; +#endif + wsi.render_surface_scale = window ? static_cast(window->devicePixelRatio()) : 1.0f; + + return wsi; +} + GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread) : QWidget(parent_), emu_thread(emu_thread) { @@ -532,6 +566,9 @@ bool GRenderWindow::InitRenderTarget() { break; } + // Update the Window System information with the new render target + window_info = GetWindowSystemInfo(child_widget->windowHandle()); + child_widget->resize(Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight); layout()->addWidget(child_widget); diff --git a/src/common/hash.h b/src/common/hash.h index 079039cfc..44da9e25d 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/cityhash.h" #include "common/common_types.h" @@ -41,6 +42,13 @@ inline u64 HashCombine(std::size_t& seed, const u64 hash) { return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2); } +template +struct IdentityHash { + T operator()(const T& value) const { + return value; + } +}; + /// A helper template that ensures the padding in a struct is initialized by memsetting to 0. template struct HashableStruct { diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 59862537b..7997137ff 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" #include "common/logging/formatter.h" + namespace Log { // trims up to and including the last of ../, ..\, src/, src\ in a string @@ -102,6 +103,7 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend + Render_Vulkan, ///< Vulkan backend Audio, ///< Audio emulation Audio_DSP, ///< The HLE and LLE implementations of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index add704207..e3bd62d8b 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -12,6 +12,15 @@ namespace Frontend { +/// Information for the Graphics Backends signifying what type of screen pointer is in +/// WindowInformation +enum class WindowSystemType { + Headless, + Windows, + X11, + Wayland, +}; + struct Frame; /** * For smooth Vsync rendering, we want to always present the latest frame that the core generates, @@ -117,6 +126,23 @@ public: std::pair min_client_area_size; }; + /// Data describing host window system information + struct WindowSystemInfo { + // Window system type. Determines which GL context or Vulkan WSI is used. + WindowSystemType type = WindowSystemType::Headless; + + // Connection to a display server. This is used on X11 and Wayland platforms. + void* display_connection = nullptr; + + // Render surface. This is a pointer to the native window handle, which depends + // on the platform. e.g. HWND for Windows, Window for X11. If the surface is + // set to nullptr, the video backend will run in headless mode. + void* render_surface = nullptr; + + // Scale of the render surface. For hidpi systems, this will be >1. + float render_surface_scale = 1.0f; + }; + /// Polls window events virtual void PollEvents() = 0; @@ -180,6 +206,13 @@ public: config = val; } + /** + * Returns system information about the drawing area. + */ + const WindowSystemInfo& GetWindowInfo() const { + return window_info; + } + /** * Gets the framebuffer layout (width, height, and screen regions) * @note This method is thread-safe @@ -226,6 +259,8 @@ protected: framebuffer_layout = layout; } + WindowSystemInfo window_info; + private: /** * Handler called when the minimal client area was requested to be changed via SetConfig. diff --git a/src/core/settings.h b/src/core/settings.h index edfba8277..e34611d06 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -169,7 +169,7 @@ struct Values { u64 init_time; // Renderer - GraphicsAPI graphics_api = GraphicsAPI::OpenGL; + GraphicsAPI graphics_api = GraphicsAPI::Vulkan; bool use_hw_renderer; bool use_hw_shader; bool separable_shader; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cd21764b6..8ec0c8942 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -82,9 +82,32 @@ add_library(video_core STATIC #temporary, move these back in alphabetical order before merging renderer_opengl/gl_format_reinterpreter.cpp renderer_opengl/gl_format_reinterpreter.h + renderer_vulkan/pica_to_vk.h + renderer_vulkan/vk_common.cpp + renderer_vulkan/vk_common.h + renderer_vulkan/vk_instance.cpp + renderer_vulkan/vk_instance.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_platform.h + renderer_vulkan/vk_renderpass_cache.cpp + renderer_vulkan/vk_renderpass_cache.h + renderer_vulkan/vk_shader_gen.cpp + renderer_vulkan/vk_shader_gen.h + renderer_vulkan/vk_shader.cpp + renderer_vulkan/vk_shader.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_task_scheduler.cpp + renderer_vulkan/vk_task_scheduler.h + renderer_vulkan/vk_texture_runtime.cpp + renderer_vulkan/vk_texture_runtime.h shader/debug_data.h shader/shader.cpp shader/shader.h + shader/shader_cache.h shader/shader_interpreter.cpp shader/shader_interpreter.h swrasterizer/clipper.cpp @@ -156,8 +179,11 @@ endif() create_target_directory_groups(video_core) +# Include Vulkan headers +target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include) +target_include_directories(video_core PRIVATE ../../externals/vma) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad nihstro-headers Boost::serialization) +target_link_libraries(video_core PRIVATE glad glslang nihstro-headers Boost::serialization) set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if (ARCHITECTURE_x86_64) diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 94b9f7502..ee4835de8 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -6,8 +6,7 @@ #include #include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" +#include "common/vector_math.h" #include "video_core/pica_types.h" namespace Pica { @@ -18,7 +17,7 @@ struct RasterizerRegs { KeepAll = 0, KeepClockWise = 1, KeepCounterClockWise = 2, - // TODO: What does the third value imply? + KeepAll2 = 3 }; union { diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index d5202169f..7b91fbd58 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -243,17 +243,12 @@ private: }; FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { - const std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - const std::string_view version{reinterpret_cast(glGetString(GL_VERSION))}; - auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { const u32 dst_index = static_cast(dest); return reinterpreters[dst_index].push_back(std::move(obj)); }; Register(VideoCore::PixelFormat::RGBA8, std::make_unique()); - LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation"); - Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index ec6625731..c73f0f8d5 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -302,9 +302,9 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); } -MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); +MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64)); void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureUL); + MICROPROFILE_SCOPE(OpenGL_Upload); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); @@ -339,9 +339,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu InvalidateAllWatcher(); } -MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); +MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64)); void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureDL); + MICROPROFILE_SCOPE(OpenGL_Download); // Ensure no bad interactions with GL_PACK_ALIGNMENT ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); diff --git a/src/video_core/renderer_vulkan/pica_to_vk.h b/src/video_core/renderer_vulkan/pica_to_vk.h new file mode 100644 index 000000000..6cbf3ecfa --- /dev/null +++ b/src/video_core/renderer_vulkan/pica_to_vk.h @@ -0,0 +1,278 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "common/logging/log.h" +#include "core/core.h" +#include "video_core/regs.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace PicaToVK { + +using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter; + +struct FilterInfo { + vk::Filter mag_filter, min_filter; + vk::SamplerMipmapMode mip_mode; +}; + +inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) { + constexpr std::array filter_table = { + vk::Filter::eNearest, + vk::Filter::eLinear + }; + + constexpr std::array mipmap_table = { + vk::SamplerMipmapMode::eNearest, + vk::SamplerMipmapMode::eLinear + }; + + return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)}; +} + +inline vk::Filter TextureFilterMode(TextureFilter mode) { + switch (mode) { + case TextureFilter::Linear: + return vk::Filter::eLinear; + case TextureFilter::Nearest: + return vk::Filter::eNearest; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown texture filtering mode {}", mode); + UNIMPLEMENTED(); + } + + return vk::Filter::eLinear; +} + +inline vk::SamplerMipmapMode TextureMipFilterMode(TextureFilter mip) { + switch (mip) { + case TextureFilter::Linear: + return vk::SamplerMipmapMode::eLinear; + case TextureFilter::Nearest: + return vk::SamplerMipmapMode::eNearest; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown texture mipmap filtering mode {}", mip); + UNIMPLEMENTED(); + } + + return vk::SamplerMipmapMode::eLinear; +} + +inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { + static constexpr std::array wrap_mode_table{{ + vk::SamplerAddressMode::eClampToEdge, + vk::SamplerAddressMode::eClampToBorder, + vk::SamplerAddressMode::eRepeat, + vk::SamplerAddressMode::eMirroredRepeat, + // TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the + // comments in enum WrapMode. + vk::SamplerAddressMode::eClampToEdge, + vk::SamplerAddressMode::eClampToBorder, + vk::SamplerAddressMode::eRepeat, + vk::SamplerAddressMode::eRepeat, + }}; + + const auto index = static_cast(mode); + + // Range check table for input + if (index >= wrap_mode_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown texture wrap mode {}", index); + UNREACHABLE(); + + return vk::SamplerAddressMode::eClampToEdge; + } + + if (index > 3) { + Core::System::GetInstance().TelemetrySession().AddField( + Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode", + static_cast(index)); + LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index); + } + + return wrap_mode_table[index]; +} + +inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) { + static constexpr std::array blend_equation_table{{ + vk::BlendOp::eAdd, + vk::BlendOp::eSubtract, + vk::BlendOp::eReverseSubtract, + vk::BlendOp::eMin, + vk::BlendOp::eMax, + }}; + + const auto index = static_cast(equation); + + // Range check table for input + if (index >= blend_equation_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown blend equation {}", index); + + // This return value is hwtested, not just a stub + return vk::BlendOp::eAdd; + } + + return blend_equation_table[index]; +} + +inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) { + static constexpr std::array blend_func_table{{ + vk::BlendFactor::eZero, // BlendFactor::Zero + vk::BlendFactor::eOne, // BlendFactor::One + vk::BlendFactor::eSrcColor, // BlendFactor::SourceColor + vk::BlendFactor::eOneMinusSrcColor, // BlendFactor::OneMinusSourceColor + vk::BlendFactor::eDstColor, // BlendFactor::DestColor + vk::BlendFactor::eOneMinusDstColor, // BlendFactor::OneMinusDestColor + vk::BlendFactor::eSrcAlpha, // BlendFactor::SourceAlpha + vk::BlendFactor::eOneMinusSrcAlpha, // BlendFactor::OneMinusSourceAlpha + vk::BlendFactor::eDstAlpha, // BlendFactor::DestAlpha + vk::BlendFactor::eOneMinusDstAlpha, // BlendFactor::OneMinusDestAlpha + vk::BlendFactor::eConstantColor, // BlendFactor::ConstantColor + vk::BlendFactor::eOneMinusConstantColor,// BlendFactor::OneMinusConstantColor + vk::BlendFactor::eConstantAlpha, // BlendFactor::ConstantAlpha + vk::BlendFactor::eOneMinusConstantAlpha,// BlendFactor::OneMinusConstantAlpha + vk::BlendFactor::eSrcAlphaSaturate, // BlendFactor::SourceAlphaSaturate + }}; + + const auto index = static_cast(factor); + + // Range check table for input + if (index >= blend_func_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown blend factor {}", index); + UNREACHABLE(); + + return vk::BlendFactor::eOne; + } + + return blend_func_table[index]; +} + +inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) { + static constexpr std::array logic_op_table{{ + vk::LogicOp::eClear, // Clear + vk::LogicOp::eAnd, // And + vk::LogicOp::eAndReverse, // AndReverse + vk::LogicOp::eCopy, // Copy + vk::LogicOp::eSet, // Set + vk::LogicOp::eCopyInverted, // CopyInverted + vk::LogicOp::eNoOp, // NoOp + vk::LogicOp::eInvert, // Invert + vk::LogicOp::eNand, // Nand + vk::LogicOp::eOr, // Or + vk::LogicOp::eNor, // Nor + vk::LogicOp::eXor, // Xor + vk::LogicOp::eEquivalent, // Equiv + vk::LogicOp::eAndInverted, // AndInverted + vk::LogicOp::eOrReverse, // OrReverse + vk::LogicOp::eOrInverted, // OrInverted + }}; + + const auto index = static_cast(op); + + // Range check table for input + if (index >= logic_op_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown logic op {}", index); + UNREACHABLE(); + + return vk::LogicOp::eCopy; + } + + return logic_op_table[index]; +} + +inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) { + static constexpr std::array compare_func_table{{ + vk::CompareOp::eNever, // CompareFunc::Never + vk::CompareOp::eAlways, // CompareFunc::Always + vk::CompareOp::eEqual, // CompareFunc::Equal + vk::CompareOp::eNotEqual, // CompareFunc::NotEqual + vk::CompareOp::eLess, // CompareFunc::LessThan + vk::CompareOp::eLessOrEqual, // CompareFunc::LessThanOrEqual + vk::CompareOp::eGreater, // CompareFunc::GreaterThan + vk::CompareOp::eGreaterOrEqual, // CompareFunc::GreaterThanOrEqual + }}; + + const auto index = static_cast(func); + + // Range check table for input + if (index >= compare_func_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown compare function {}", index); + UNREACHABLE(); + + return vk::CompareOp::eAlways; + } + + return compare_func_table[index]; +} + +inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) { + static constexpr std::array stencil_op_table{{ + vk::StencilOp::eKeep, // StencilAction::Keep + vk::StencilOp::eZero, // StencilAction::Zero + vk::StencilOp::eReplace, // StencilAction::Replace + vk::StencilOp::eIncrementAndClamp, // StencilAction::Increment + vk::StencilOp::eDecrementAndClamp, // StencilAction::Decrement + vk::StencilOp::eInvert, // StencilAction::Invert + vk::StencilOp::eIncrementAndWrap, // StencilAction::IncrementWrap + vk::StencilOp::eDecrementAndWrap, // StencilAction::DecrementWrap + }}; + + const auto index = static_cast(action); + + // Range check table for input + if (index >= stencil_op_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown stencil op {}", index); + UNREACHABLE(); + + return vk::StencilOp::eKeep; + } + + return stencil_op_table[index]; +} + +inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopology topology) { + switch (topology) { + case Pica::PipelineRegs::TriangleTopology::Fan: + return vk::PrimitiveTopology::eTriangleFan; + case Pica::PipelineRegs::TriangleTopology::List: + case Pica::PipelineRegs::TriangleTopology::Shader: + return vk::PrimitiveTopology::eTriangleList; + case Pica::PipelineRegs::TriangleTopology::Strip: + return vk::PrimitiveTopology::eTriangleStrip; + } +} + +inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { + switch (mode) { + case Pica::RasterizerRegs::CullMode::KeepAll: + case Pica::RasterizerRegs::CullMode::KeepAll2: + return vk::CullModeFlagBits::eNone; + case Pica::RasterizerRegs::CullMode::KeepClockWise: + case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: + return vk::CullModeFlagBits::eBack; + } +} + +inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) { + switch (mode) { + case Pica::RasterizerRegs::CullMode::KeepAll: + case Pica::RasterizerRegs::CullMode::KeepAll2: + case Pica::RasterizerRegs::CullMode::KeepClockWise: + return vk::FrontFace::eCounterClockwise; + case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: + return vk::FrontFace::eClockwise; + } +} + +inline Common::Vec4f ColorRGBA8(const u32 color) { + const auto rgba = + Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF}; + return rgba / 255.0f; +} + +inline Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) { + return Common::Vec3u{color.r, color.g, color.b} / 255.0f; +} + +} // namespace PicaToGL diff --git a/src/video_core/renderer_vulkan/vk_common.cpp b/src/video_core/renderer_vulkan/vk_common.cpp new file mode 100644 index 000000000..f8377bc8d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_common.cpp @@ -0,0 +1,9 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VMA_IMPLEMENTATION +#include "video_core/renderer_vulkan/vk_common.h" + +// Store the dispatch loader here +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h new file mode 100644 index 000000000..8fcdfcd62 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -0,0 +1,66 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +// Include vulkan-hpp header +#define VK_NO_PROTOTYPES 1 +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 +#include + +// Include Vulkan memory allocator +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1 +#include + +namespace Vulkan { + +/// Return the image aspect associated on the provided format +constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) { + switch (format) { + case vk::Format::eD16UnormS8Uint: + case vk::Format::eD24UnormS8Uint: + case vk::Format::eX8D24UnormPack32: + case vk::Format::eD32SfloatS8Uint: + return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth; + break; + case vk::Format::eD16Unorm: + case vk::Format::eD32Sfloat: + return vk::ImageAspectFlagBits::eDepth; + break; + default: + return vk::ImageAspectFlagBits::eColor; + } +} + +/// Returns a bit mask with the required usage of a format with a particular aspect +constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) { + auto usage = vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferDst | + vk::ImageUsageFlagBits::eTransferSrc; + + if (aspect & vk::ImageAspectFlagBits::eDepth) { + return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment; + } else { + return usage | vk::ImageUsageFlagBits::eColorAttachment; + } +}; + +/// Returns a bit mask with the required features of a format with a particular aspect +constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) { + auto usage = vk::FormatFeatureFlagBits::eSampledImage | + vk::FormatFeatureFlagBits::eTransferDst | + vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst; + + if (aspect & vk::ImageAspectFlagBits::eDepth) { + return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment; + } else { + return usage | vk::FormatFeatureFlagBits::eColorAttachment; + } +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp new file mode 100644 index 000000000..371338ffc --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -0,0 +1,292 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +Instance::Instance(Frontend::EmuWindow& window) { + auto window_info = window.GetWindowInfo(); + + // Fetch instance independant function pointers + vk::DynamicLoader dl; + auto vkGetInstanceProcAddr = dl.getProcAddress("vkGetInstanceProcAddr"); + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + + // Enable the instance extensions the backend uses + auto extensions = GetInstanceExtensions(window_info.type, true); + + // We require a Vulkan 1.1 driver + const u32 available_version = vk::enumerateInstanceVersion(); + if (available_version < VK_API_VERSION_1_1) { + LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!"); + } + + const vk::ApplicationInfo application_info = { + .pApplicationName = "Citra", + .applicationVersion = VK_MAKE_VERSION(1, 0, 0), + .pEngineName = "Citra Vulkan", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = available_version + }; + + const std::array layers = {"VK_LAYER_KHRONOS_validation"}; + const vk::InstanceCreateInfo instance_info = { + .pApplicationInfo = &application_info, + .enabledLayerCount = static_cast(layers.size()), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = static_cast(extensions.size()), + .ppEnabledExtensionNames = extensions.data() + }; + + // Create VkInstance + instance = vk::createInstance(instance_info); + VULKAN_HPP_DEFAULT_DISPATCHER.init(instance); + surface = CreateSurface(instance, window); + + // TODO: GPU select dialog + physical_device = instance.enumeratePhysicalDevices()[0]; + device_limits = physical_device.getProperties().limits; + + // Create logical device + CreateDevice(true); +} + +Instance::~Instance() { + device.waitIdle(); + vmaDestroyAllocator(allocator); + device.destroy(); + instance.destroySurfaceKHR(surface); + instance.destroy(); +} + +bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const { + static std::unordered_map supported; + if (auto it = supported.find(format); it != supported.end()) { + return (it->second.optimalTilingFeatures & usage) == usage; + } + + // Cache format properties so we don't have to query the driver all the time + const vk::FormatProperties properties = physical_device.getFormatProperties(format); + supported.insert(std::make_pair(format, properties)); + + return (properties.optimalTilingFeatures & usage) == usage; +} + +vk::Format Instance::GetFormatAlternative(vk::Format format) const { + vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format)); + if (IsFormatSupported(format, features)) { + return format; + } + + // Return the most supported alternative format preferably with the + // same block size according to the Vulkan spec. + // See 43.3. Required Format Support of the Vulkan spec + switch (format) { + case vk::Format::eD24UnormS8Uint: + return vk::Format::eD32SfloatS8Uint; + case vk::Format::eX8D24UnormPack32: + return vk::Format::eD32Sfloat; + case vk::Format::eR5G5B5A1UnormPack16: + return vk::Format::eA1R5G5B5UnormPack16; + case vk::Format::eR8G8B8Unorm: + return vk::Format::eR8G8B8A8Unorm; + case vk::Format::eUndefined: + return vk::Format::eUndefined; + case vk::Format::eR4G4B4A4UnormPack16: + // B4G4R4A4 is not guaranteed by the spec to support attachments + return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16); + default: + LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}", + vk::to_string(format), vk::to_string(features)); + return vk::Format::eR8G8B8A8Unorm; + } +} + +bool Instance::CreateDevice(bool validation_enabled) { + // Determine required extensions and features + auto feature_chain = physical_device.getFeatures2(); + + // Not having geometry shaders or wide lines will cause issues with rendering. + const vk::PhysicalDeviceFeatures available = feature_chain.get().features; + if (!available.geometryShader && !available.wideLines) { + LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!"); + } + + // Enable some common features other emulators like Dolphin use + const vk::PhysicalDeviceFeatures2 features = { + .features = { + .robustBufferAccess = available.robustBufferAccess, + .geometryShader = available.geometryShader, + .sampleRateShading = available.sampleRateShading, + .dualSrcBlend = available.dualSrcBlend, + .logicOp = available.logicOp, + .depthClamp = available.depthClamp, + .largePoints = available.largePoints, + .samplerAnisotropy = available.samplerAnisotropy, + .occlusionQueryPrecise = available.occlusionQueryPrecise, + .fragmentStoresAndAtomics = available.fragmentStoresAndAtomics, + .shaderStorageImageMultisample = available.shaderStorageImageMultisample, + .shaderClipDistance = available.shaderClipDistance + } + }; + + // Enable newer Vulkan features + auto enabled_features = vk::StructureChain{ + features, + //feature_chain.get(), + //feature_chain.get(), + //feature_chain.get() + }; + + auto extension_list = physical_device.enumerateDeviceExtensionProperties(); + if (extension_list.empty()) { + LOG_CRITICAL(Render_Vulkan, "No extensions supported by device."); + return false; + } + + // List available device extensions + for (const auto& extension : extension_list) { + LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName); + } + + // Helper lambda for adding extensions + std::array enabled_extensions; + u32 enabled_extension_count = 0; + + auto AddExtension = [&](std::string_view name, bool required) -> bool { + auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) { + return name.compare(prop.extensionName.data()); + }); + + if (result != extension_list.end()) { + LOG_INFO(Render_Vulkan, "Enabling extension: {}", name); + enabled_extensions[enabled_extension_count++] = name.data(); + return true; + } + + if (required) { + LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name); + } + + return false; + }; + + // Add required extensions + AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true); + + // Check for optional features + //dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); + //extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); + //push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); + + // Search queue families for graphics and present queues + auto family_properties = physical_device.getQueueFamilyProperties(); + if (family_properties.empty()) { + LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues."); + return false; + } + + graphics_queue_family_index = -1; + present_queue_family_index = -1; + for (int i = 0; i < family_properties.size(); i++) { + // Check if queue supports graphics + if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) { + graphics_queue_family_index = i; + + // If this queue also supports presentation we are finished + if (physical_device.getSurfaceSupportKHR(i, surface)) { + present_queue_family_index = i; + break; + } + } + + // Check if queue supports presentation + if (physical_device.getSurfaceSupportKHR(i, surface)) { + present_queue_family_index = i; + } + } + + if (graphics_queue_family_index == -1 || present_queue_family_index == -1) { + LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues."); + return false; + } + + static constexpr float queue_priorities[] = {1.0f}; + + const std::array layers = {"VK_LAYER_KHRONOS_validation"}; + const std::array queue_infos = { + vk::DeviceQueueCreateInfo{ + .queueFamilyIndex = graphics_queue_family_index, + .queueCount = 1, + .pQueuePriorities = queue_priorities + }, + vk::DeviceQueueCreateInfo{ + .queueFamilyIndex = present_queue_family_index, + .queueCount = 1, + .pQueuePriorities = queue_priorities + } + }; + + vk::DeviceCreateInfo device_info = { + .pNext = &features, // TODO: Change this + .queueCreateInfoCount = 1, + .pQueueCreateInfos = queue_infos.data(), + .enabledExtensionCount = enabled_extension_count, + .ppEnabledExtensionNames = enabled_extensions.data(), + }; + + if (graphics_queue_family_index != present_queue_family_index) { + device_info.queueCreateInfoCount = 2; + } + + // Enable debug layer on debug builds + if (validation_enabled) { + device_info.enabledLayerCount = static_cast(layers.size()); + device_info.ppEnabledLayerNames = layers.data(); + } + + // Create logical device + device = physical_device.createDevice(device_info); + VULKAN_HPP_DEFAULT_DISPATCHER.init(device); + + // Grab the graphics and present queues. + graphics_queue = device.getQueue(graphics_queue_family_index, 0); + present_queue = device.getQueue(present_queue_family_index, 0); + + // Create the VMA allocator + CreateAllocator(); + + return true; +} + +void Instance::CreateAllocator() { + VmaVulkanFunctions functions = { + .vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr, + .vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr + }; + + VmaAllocatorCreateInfo allocator_info = { + .physicalDevice = physical_device, + .device = device, + .pVulkanFunctions = &functions, + .instance = instance, + .vulkanApiVersion = VK_API_VERSION_1_1 + }; + + if (auto result = vmaCreateAllocator(&allocator_info, &allocator); result != VK_SUCCESS) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize VMA with error {}", result); + UNREACHABLE(); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h new file mode 100644 index 000000000..d4b47bf99 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -0,0 +1,129 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { + +/// The global Vulkan instance +class Instance { +public: + Instance(Frontend::EmuWindow& window); + ~Instance(); + + /// Returns true when the format supports the provided feature flags + bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const; + + /// Returns the most compatible format that supports the provided feature flags + vk::Format GetFormatAlternative(vk::Format format) const; + + /// Returns the Vulkan instance + vk::Instance GetInstance() const { + return instance; + } + + /// Returns the Vulkan surface + vk::SurfaceKHR GetSurface() const { + return surface; + } + + /// Returns the current physical device + vk::PhysicalDevice GetPhysicalDevice() const { + return physical_device; + } + + /// Returns the Vulkan device + vk::Device GetDevice() const { + return device; + } + + VmaAllocator GetAllocator() const { + return allocator; + } + + /// Retrieve queue information + u32 GetGraphicsQueueFamilyIndex() const { + return graphics_queue_family_index; + } + + u32 GetPresentQueueFamilyIndex() const { + return present_queue_family_index; + } + + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + vk::Queue GetPresentQueue() const { + return present_queue; + } + + /// Feature support + bool IsDynamicRenderingSupported() const { + return dynamic_rendering; + } + + bool IsExtendedDynamicStateSupported() const { + // TODO: Enable this when the pipeline builder is confirmed functional + return false; + } + + bool IsPushDescriptorsSupported() const { + return push_descriptors; + } + + /// Returns the vendor ID of the physical device + u32 GetVendorID() const { + return device_properties.vendorID; + } + + /// Returns the device ID of the physical device + u32 GetDeviceID() const { + return device_properties.deviceID; + } + + /// Returns the pipeline cache unique identifier + const auto GetPipelineCacheUUID() const { + return device_properties.pipelineCacheUUID; + } + + /// Returns the minimum required alignment for uniforms + vk::DeviceSize UniformMinAlignment() const { + return device_limits.minUniformBufferOffsetAlignment; + } + +private: + bool CreateDevice(bool validation_enabled); + void CreateAllocator(); + +private: + // Queue family indexes + u32 present_queue_family_index = 0; + u32 graphics_queue_family_index = 0; + vk::Queue present_queue, graphics_queue; + + // Core vulkan objects + vk::Device device; + vk::PhysicalDevice physical_device; + vk::Instance instance; + vk::SurfaceKHR surface; + vk::PhysicalDeviceLimits device_limits; + vk::PhysicalDeviceProperties device_properties; + VmaAllocator allocator; + + // Features per vulkan version + bool dynamic_rendering = false; + bool extended_dynamic_state = false; + bool push_descriptors = false; +}; + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 000000000..4b3d9a33d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -0,0 +1,646 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/common_paths.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" + +namespace Vulkan { + +struct Bindings { + std::array bindings; + u32 binding_count; +}; + +constexpr u32 RASTERIZER_SET_COUNT = 4; +constexpr static std::array RASTERIZER_SETS = { + Bindings{ + // Utility set + .bindings = { + vk::DescriptorType::eUniformBuffer, + vk::DescriptorType::eUniformBuffer, + vk::DescriptorType::eUniformTexelBuffer, + vk::DescriptorType::eUniformTexelBuffer, + vk::DescriptorType::eUniformTexelBuffer + }, + .binding_count = 5 + }, + Bindings{ + // Texture set + .bindings = { + vk::DescriptorType::eSampledImage, + vk::DescriptorType::eSampledImage, + vk::DescriptorType::eSampledImage, + vk::DescriptorType::eSampledImage + }, + .binding_count = 4 + }, + Bindings{ + // Sampler set + .bindings = { + vk::DescriptorType::eSampler, + vk::DescriptorType::eSampler, + vk::DescriptorType::eSampler, + vk::DescriptorType::eSampler + }, + .binding_count = 4 + }, + Bindings { + // Shadow set + .bindings = { + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage + }, + .binding_count = 7 + } +}; + +constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) { + vk::ShaderStageFlags flags; + switch (type) { + case vk::DescriptorType::eSampler: + case vk::DescriptorType::eSampledImage: + case vk::DescriptorType::eUniformTexelBuffer: + case vk::DescriptorType::eStorageImage: + flags = vk::ShaderStageFlagBits::eFragment; + break; + case vk::DescriptorType::eUniformBuffer: + case vk::DescriptorType::eUniformBufferDynamic: + flags = vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eCompute; + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown descriptor type!"); + } + + return flags; +} + +u32 AttribBytes(VertexAttribute attrib) { + switch (attrib.type) { + case AttribType::Float: + return sizeof(float) * attrib.size; + case AttribType::Int: + return sizeof(u32) * attrib.size; + case AttribType::Short: + return sizeof(u16) * attrib.size; + case AttribType::Byte: + case AttribType::Ubyte: + return sizeof(u8) * attrib.size; + } +} + +vk::Format ToVkAttributeFormat(VertexAttribute attrib) { + switch (attrib.type) { + case AttribType::Float: + switch (attrib.size) { + case 1: return vk::Format::eR32Sfloat; + case 2: return vk::Format::eR32G32Sfloat; + case 3: return vk::Format::eR32G32B32Sfloat; + case 4: return vk::Format::eR32G32B32A32Sfloat; + } + default: + LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!"); + UNREACHABLE(); + } + + return vk::Format::eR32Sfloat; +} + +vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { + switch (index) { + case 0: return vk::ShaderStageFlagBits::eVertex; + case 1: return vk::ShaderStageFlagBits::eFragment; + case 2: return vk::ShaderStageFlagBits::eGeometry; + default: + LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!"); + UNREACHABLE(); + } + + return vk::ShaderStageFlagBits::eVertex; +} + +PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} { + descriptor_dirty.fill(true); + + LoadDiskCache(); +} + +PipelineCache::~PipelineCache() { + vk::Device device = instance.GetDevice(); + + SaveDiskCache(); + + device.destroyPipelineLayout(layout); + for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) { + device.destroyDescriptorSetLayout(descriptor_set_layouts[i]); + device.destroyDescriptorUpdateTemplate(update_templates[i]); + } + + for (const auto& [hash, pipeline] : graphics_pipelines) { + device.destroyPipeline(pipeline); + } + + graphics_pipelines.clear(); +} + +void PipelineCache::BindPipeline(const PipelineInfo& info) { + ApplyDynamic(info); + + u64 shader_hash = 0; + for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { + shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); + } + + const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() ? + offsetof(PipelineInfo, rasterization) : + offsetof(PipelineInfo, depth_stencil) + offsetof(DepthStencilState, stencil_reference); + + u64 info_hash = Common::ComputeHash64(&info, info_hash_size); + u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); + + auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{}); + if (new_pipeline) { + it->second = BuildPipeline(info); + } + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second); + + BindDescriptorSets(); +} + +bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) { + const PicaVSConfig config{regs.vs, setup}; + auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex, + instance.GetDevice(), ShaderOptimization::Debug); + if (!handle) { + return false; + } + + current_shaders[ProgramType::VS] = handle; + shader_hashes[ProgramType::VS] = config.Hash(); + return true; +} + +void PipelineCache::UseTrivialVertexShader() { + current_shaders[ProgramType::VS] = trivial_vertex_shader; + shader_hashes[ProgramType::VS] = 0; +} + +void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { + const PicaFixedGSConfig gs_config{regs}; + auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, + instance.GetDevice(), ShaderOptimization::Debug); + current_shaders[ProgramType::GS] = handle; + shader_hashes[ProgramType::GS] = gs_config.Hash(); +} + +void PipelineCache::UseTrivialGeometryShader() { + current_shaders[ProgramType::GS] = VK_NULL_HANDLE; + shader_hashes[ProgramType::GS] = 0; +} + +void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { + const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); + auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment, + instance.GetDevice(), ShaderOptimization::Debug); + current_shaders[ProgramType::FS] = handle; + shader_hashes[ProgramType::FS] = config.Hash(); +} + +void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_view) { + const DescriptorData data = { + .image_info = vk::DescriptorImageInfo{ + .imageView = image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + } + }; + + SetBinding(set, descriptor, data); +} + +void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 offset, u32 size) { + const DescriptorData data = { + .buffer_info = vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size + } + }; + + SetBinding(set, descriptor, data); +} + +void PipelineCache::BindTexelBuffer(u32 set, u32 descriptor, vk::BufferView buffer_view) { + const DescriptorData data = { + .buffer_view = buffer_view + }; + + SetBinding(set, descriptor, data); +} + +void PipelineCache::BindSampler(u32 set, u32 descriptor, vk::Sampler sampler) { + const DescriptorData data = { + .image_info = vk::DescriptorImageInfo{ + .sampler = sampler + } + }; + + SetBinding(set, descriptor, data); +} + +void PipelineCache::SetViewport(float x, float y, float width, float height) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f}); +} + +void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}}); +} + +void PipelineCache::MarkDescriptorSetsDirty() { + descriptor_dirty.fill(true); +} + +void PipelineCache::ApplyDynamic(const PipelineInfo& info) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_compare_mask); + command_buffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_write_mask); + command_buffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, info.depth_stencil.stencil_reference); + + if (instance.IsExtendedDynamicStateSupported()) { + command_buffer.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode)); + command_buffer.setDepthCompareOpEXT(PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op)); + command_buffer.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable); + command_buffer.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable); + command_buffer.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode)); + command_buffer.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(info.rasterization.topology)); + command_buffer.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable); + command_buffer.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, + PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)); + } +} + +void PipelineCache::SetBinding(u32 set, u32 binding, DescriptorData data) { + if (update_data[set][binding] != data) { + update_data[set][binding] = data; + descriptor_dirty[set] = true; + } +} + +void PipelineCache::BuildLayout() { + std::array set_bindings; + std::array update_entries; + + vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { + const auto& set = RASTERIZER_SETS[i]; + for (u32 j = 0; j < set.binding_count; j++) { + vk::DescriptorType type = set.bindings[j]; + set_bindings[j] = vk::DescriptorSetLayoutBinding{ + .binding = j, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = ToVkStageFlags(type) + }; + + update_entries[j] = vk::DescriptorUpdateTemplateEntry{ + .dstBinding = j, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = j * sizeof(DescriptorData), + .stride = 0 + }; + } + + const vk::DescriptorSetLayoutCreateInfo layout_info = { + .bindingCount = set.binding_count, + .pBindings = set_bindings.data() + }; + + // Create descriptor set layout + descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info); + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = set.binding_count, + .pDescriptorUpdateEntries = update_entries.data(), + .descriptorSetLayout = descriptor_set_layouts[i] + }; + + // Create descriptor set update template + update_templates[i] = device.createDescriptorUpdateTemplate(template_info); + } + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = RASTERIZER_SET_COUNT, + .pSetLayouts = descriptor_set_layouts.data(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr + }; + + layout = device.createPipelineLayout(layout_info); +} + +vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { + vk::Device device = instance.GetDevice(); + + u32 shader_count = 0; + std::array shader_stages; + for (std::size_t i = 0; i < current_shaders.size(); i++) { + vk::ShaderModule shader = current_shaders[i]; + if (!shader) { + continue; + } + + shader_stages[i] = vk::PipelineShaderStageCreateInfo{ + .stage = ToVkShaderStage(i), + .module = shader, + .pName = "main" + }; + } + + /** + * Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing + * data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate. + * Since one instance is all we render, the shader will always read the single attribute. + */ + std::array bindings; + for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { + const auto& binding = info.vertex_layout.bindings[i]; + bindings[i] = vk::VertexInputBindingDescription{ + .binding = binding.binding, + .stride = binding.stride, + .inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance + : vk::VertexInputRate::eVertex + }; + } + + // Populate vertex attribute structures + std::array attributes; + for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { + const auto& attr = info.vertex_layout.attributes[i]; + attributes[i] = vk::VertexInputAttributeDescription{ + .location = attr.location, + .binding = attr.binding, + .format = ToVkAttributeFormat(attr), + .offset = attr.offset + }; + } + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = info.vertex_layout.binding_count, + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count, + .pVertexAttributeDescriptions = attributes.data() + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = PicaToVK::PrimitiveTopology(info.rasterization.topology), + .primitiveRestartEnable = false + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = PicaToVK::CullMode(info.rasterization.cull_mode), + .frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode), + .depthBiasEnable = false, + .lineWidth = 1.0f + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = info.blending.blend_enable.Value(), + .srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor), + .dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor), + .colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq), + .srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor), + .dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor), + .alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq), + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = info.blending.logic_op_enable.Value(), + .logicOp = PicaToVK::LogicOp(info.blending.logic_op), + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f} + }; + + const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}}; + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &placeholder_viewport, + .scissorCount = 1, + .pScissors = &placeholder_scissor, + }; + + const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported(); + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + vk::DynamicState::eLineWidth, + vk::DynamicState::eStencilCompareMask, + vk::DynamicState::eStencilWriteMask, + vk::DynamicState::eStencilReference, + // VK_EXT_extended_dynamic_state + vk::DynamicState::eCullModeEXT, + vk::DynamicState::eDepthCompareOpEXT, + vk::DynamicState::eDepthTestEnableEXT, + vk::DynamicState::eDepthWriteEnableEXT, + vk::DynamicState::eFrontFaceEXT, + vk::DynamicState::ePrimitiveTopologyEXT, + vk::DynamicState::eStencilOpEXT, + vk::DynamicState::eStencilTestEnableEXT, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = extended_dynamic_states ? 14u : 6u, + .pDynamicStates = dynamic_states.data() + }; + + const vk::StencilOpState stencil_op_state = { + .failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + .passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + .depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + .compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op) + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = static_cast(info.depth_stencil.depth_test_enable.Value()), + .depthWriteEnable = static_cast(info.depth_stencil.depth_write_enable.Value()), + .depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op), + .depthBoundsTestEnable = false, + .stencilTestEnable = static_cast(info.depth_stencil.stencil_test_enable.Value()), + .front = stencil_op_state, + .back = stencil_op_state + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = shader_count, + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = layout, + .renderPass = renderpass_cache.GetRenderpass(info.color_attachment, + info.depth_attachment, false) + }; + + if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!"); + UNREACHABLE(); + } + + return VK_NULL_HANDLE; +} + +void PipelineCache::BindDescriptorSets() { + vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { + if (descriptor_dirty[i] || !descriptor_sets[i]) { + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = scheduler.GetDescriptorPool(), + .descriptorSetCount = 1, + .pSetLayouts = &descriptor_set_layouts[i] + }; + + vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; + device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i].data()); + + descriptor_sets[i] = set; + descriptor_dirty[i] = false; + } + } + + // Bind the descriptor sets + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, RASTERIZER_SET_COUNT, + descriptor_sets.data(), 0, nullptr); +} + +void PipelineCache::LoadDiskCache() { + const std::string cache_path = + FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "vulkan" + DIR_SEP "pipelines.bin"; + + FileUtil::IOFile cache_file{cache_path, "r"}; + if (!cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "No pipeline cache found"); + } + + const u32 cache_file_size = cache_file.GetSize(); + auto cache_data = std::vector(cache_file_size); + if (!cache_file.ReadBytes(cache_data.data(), cache_file_size)) { + LOG_WARNING(Render_Vulkan, "Error during pipeline cache read"); + return; + } + + cache_file.Close(); + + const bool is_valid = ValidateData(cache_data.data(), cache_file_size); + const vk::PipelineCacheCreateInfo cache_info = { + .initialDataSize = is_valid ? cache_file_size : 0, + .pInitialData = cache_data.data() + }; + + vk::Device device = instance.GetDevice(); + pipeline_cache = device.createPipelineCache(cache_info); +} + +void PipelineCache::SaveDiskCache() { + const std::string cache_path = + FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "vulkan" + DIR_SEP "pipelines.bin"; + + FileUtil::IOFile cache_file{cache_path, "w"}; + if (!cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing"); + return; + } + + vk::Device device = instance.GetDevice(); + auto cache_data = device.getPipelineCacheData(pipeline_cache); + if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) { + LOG_WARNING(Render_Vulkan, "Error during pipeline cache write"); + return; + } + + cache_file.Close(); +} + +bool PipelineCache::ValidateData(const u8* data, u32 size) { + if (size < sizeof(vk::PipelineCacheHeaderVersionOne)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header"); + return false; + } + + vk::PipelineCacheHeaderVersionOne header; + std::memcpy(&header, data, sizeof(header)); + if (header.headerSize < sizeof(header)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) { + LOG_ERROR(Render_Vulkan, + "Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})", + header.vendorID, vendor_id); + return false; + } + + if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) { + LOG_ERROR(Render_Vulkan, + "Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})", + header.deviceID, device_id); + return false; + } + + if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 000000000..be85f3cfe --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -0,0 +1,248 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/bit_field.h" +#include "common/hash.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" +#include "video_core/shader/shader_cache.h" +#include "video_core/regs.h" + +namespace Vulkan { + +constexpr u32 MAX_SHADER_STAGES = 3; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; +constexpr u32 MAX_VERTEX_BINDINGS = 16; +constexpr u32 MAX_DESCRIPTORS = 8; +constexpr u32 MAX_DESCRIPTOR_SETS = 6; + +enum class AttribType : u32 { + Float = 0, + Int = 1, + Short = 2, + Byte = 3, + Ubyte = 4 +}; + +/** + * The pipeline state is tightly packed with bitfields to reduce + * the overhead of hashing as much as possible + */ +union RasterizationState { + u8 value = 0; + BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology; + BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode; +}; + +struct DepthStencilState { + union { + u32 value = 0; + BitField<0, 1, u32> depth_test_enable; + BitField<1, 1, u32> depth_write_enable; + BitField<2, 1, u32> stencil_test_enable; + BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op; + BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op; + BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op; + BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op; + BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op; + }; + + // These are dynamic state so keep them separate + u8 stencil_reference; + u8 stencil_compare_mask; + u8 stencil_write_mask; +}; + +union BlendingState { + u32 value = 0; + BitField<0, 1, u32> blend_enable; + BitField<1, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor; + BitField<5, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor; + BitField<9, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq; + BitField<12, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor; + BitField<16, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor; + BitField<20, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq; + BitField<23, 4, u32> color_write_mask; + BitField<27, 1, u32> logic_op_enable; + BitField<28, 4, Pica::FramebufferRegs::LogicOp> logic_op; +}; + +union VertexBinding { + u16 value = 0; + BitField<0, 4, u16> binding; + BitField<4, 1, u16> fixed; + BitField<5, 11, u16> stride; +}; + +union VertexAttribute { + u32 value = 0; + BitField<0, 4, u32> binding; + BitField<4, 4, u32> location; + BitField<8, 3, AttribType> type; + BitField<11, 3, u32> size; + BitField<14, 11, u32> offset; +}; + +struct VertexLayout { + u8 binding_count; + u8 attribute_count; + std::array bindings; + std::array attributes; +}; + +/** + * Information about a graphics/compute pipeline + */ +struct PipelineInfo { + VertexLayout vertex_layout{}; + BlendingState blending{}; + VideoCore::PixelFormat color_attachment = VideoCore::PixelFormat::RGBA8; + VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8; + RasterizationState rasterization{}; + DepthStencilState depth_stencil{}; +}; + +union DescriptorData { + vk::DescriptorImageInfo image_info; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; + + bool operator!=(const DescriptorData& other) const { + return std::memcmp(this, &other, sizeof(DescriptorData)) != 0; + } +}; + +using DescriptorSetData = std::array; + +/** + * Vulkan specialized PICA shader caches + */ +using ProgrammableVertexShaders = + Pica::Shader::ShaderDoubleCache; + +using FixedGeometryShaders = + Pica::Shader::ShaderCache; + +using FragmentShaders = + Pica::Shader::ShaderCache; + + +class Instance; +class TaskScheduler; +class RenderpassCache; + +/** + * Stores a collection of rasterizer pipelines used during rendering. + * In addition handles descriptor set management. + */ +class PipelineCache { +public: + PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache); + ~PipelineCache(); + + /// Binds a pipeline using the provided information + void BindPipeline(const PipelineInfo& info); + + /// Binds a PICA decompiled vertex shader + bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup); + + /// Binds a passthrough vertex shader + void UseTrivialVertexShader(); + + /// Binds a PICA decompiled geometry shader + void UseFixedGeometryShader(const Pica::Regs& regs); + + /// Binds a passthrough geometry shader + void UseTrivialGeometryShader(); + + /// Binds a fragment shader generated from PICA state + void UseFragmentShader(const Pica::Regs& regs); + + /// Binds a texture to the specified descriptor + void BindTexture(u32 set, u32 binding, vk::ImageView view); + + /// Binds a buffer to the specified descriptor + void BindBuffer(u32 set, u32 binding, vk::Buffer buffer, u32 offset, u32 size); + + /// Binds a buffer to the specified descriptor + void BindTexelBuffer(u32 set, u32 binding, vk::BufferView buffer_view); + + /// Binds a sampler to the specified descriptor + void BindSampler(u32 set, u32 binding, vk::Sampler sampler); + + /// Sets the viewport rectangle to the provided values + void SetViewport(float x, float y, float width, float height); + + /// Sets the scissor rectange to the provided values + void SetScissor(s32 x, s32 y, u32 width, u32 height); + + /// Marks all descriptor sets as dirty + void MarkDescriptorSetsDirty(); + +private: + /// Binds a resource to the provided binding + void SetBinding(u32 set, u32 binding, DescriptorData data); + + /// Applies dynamic pipeline state to the current command buffer + void ApplyDynamic(const PipelineInfo& info); + + /// Builds the rasterizer pipeline layout + void BuildLayout(); + + /// Builds a rasterizer pipeline using the PipelineInfo struct + vk::Pipeline BuildPipeline(const PipelineInfo& info); + + /// Builds descriptor sets that reference the currently bound resources + void BindDescriptorSets(); + + /// Loads the pipeline cache stored to disk + void LoadDiskCache(); + + /// Stores the generated pipeline cache to disk + void SaveDiskCache(); + + /// Ensures the disk data was generated from the same driver + bool ValidateData(const u8* data, u32 size); + +private: + const Instance& instance; + TaskScheduler& scheduler; + RenderpassCache& renderpass_cache; + + // Cached pipelines + vk::PipelineCache pipeline_cache; + std::unordered_map> graphics_pipelines; + vk::Pipeline current_pipeline{}; + + // Cached layouts for the rasterizer pipelines + vk::PipelineLayout layout; + std::array descriptor_set_layouts; + std::array update_templates; + + // Current data for the descriptor sets + std::array update_data{}; + std::array descriptor_dirty{}; + std::array descriptor_sets; + + // Bound shader modules + enum ProgramType : u32 { + VS = 0, + GS = 2, + FS = 1 + }; + + std::array current_shaders; + std::array shader_hashes; + ProgrammableVertexShaders programmable_vertex_shaders; + FixedGeometryShaders fixed_geometry_shaders; + FragmentShaders fragment_shaders; + vk::ShaderModule trivial_vertex_shader; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h new file mode 100644 index 000000000..c7d056667 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -0,0 +1,130 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +// Include the vulkan platform specific header +#if defined(ANDROID) || defined (__ANDROID__) + #define VK_USE_PLATFORM_ANDROID_KHR 1 +#elif defined(_WIN32) + #define VK_USE_PLATFORM_WIN32_KHR 1 +#elif defined(__APPLE__) + #define VK_USE_PLATFORM_MACOS_MVK 1 + #define VK_USE_PLATFORM_METAL_EXT 1 +#else + #ifdef WAYLAND_DISPLAY + #define VK_USE_PLATFORM_WAYLAND_KHR 1 + #else // wayland + #define VK_USE_PLATFORM_XLIB_KHR 1 + #endif +#endif + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) { + const auto& window_info = emu_window.GetWindowInfo(); + vk::SurfaceKHR surface; + +#if VK_USE_PLATFORM_WIN32_KHR + if (window_info.type == Frontend::WindowSystemType::Windows) { + const vk::Win32SurfaceCreateInfoKHR win32_ci = { + .hinstance = nullptr, + .hwnd = static_cast(window_info.render_surface) + }; + + if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface"); + } + } +#elif VK_USE_PLATFORM_XLIB_KHR + if (window_info.type == Frontend::WindowSystemType::X11) { + const vk::XlibSurfaceCreateInfoKHR xlib_ci{{}, + static_cast(window_info.display_connection), + reinterpret_cast(window_info.render_surface)}; + if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + UNREACHABLE(); + } + } + +#elif VK_USE_PLATFORM_WAYLAND_KHR + if (window_info.type == Frontend::WindowSystemType::Wayland) { + const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{}, + static_cast(window_info.display_connection), + static_cast(window_info.render_surface)}; + if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + UNREACHABLE(); + } + } +#endif + + if (!surface) { + LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform"); + } + + return surface; +} + +inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) { + const auto properties = vk::enumerateInstanceExtensionProperties(); + if (properties.empty()) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return std::vector{}; + } + + // Add the windowing system specific extension + std::vector extensions; + extensions.reserve(6); + + switch (window_type) { + case Frontend::WindowSystemType::Headless: + break; +#if VK_USE_PLATFORM_WIN32_KHR + case Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#elif VK_USE_PLATFORM_XLIB_KHR + case Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; +#elif VK_USE_PLATFORM_WAYLAND_KHR + case Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + + if (window_type != Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + + if (enable_debug_utils) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + for (const char* extension : extensions) { + const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) { + return std::strcmp(extension, prop.extensionName) == 0; + }); + + if (iter == properties.end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return std::vector{}; + } + } + + return extensions; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp new file mode 100644 index 000000000..caf9ac687 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -0,0 +1,173 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +namespace Vulkan { + +vk::Format ToVkFormatColor(u32 index) { + switch (index) { + case 1: return vk::Format::eR8G8B8A8Unorm; + case 2: return vk::Format::eR8G8B8Unorm; + case 3: return vk::Format::eR5G5B5A1UnormPack16; + case 4: return vk::Format::eR5G6B5UnormPack16; + case 5: return vk::Format::eR4G4B4A4UnormPack16; + default: return vk::Format::eUndefined; + } +} + +vk::Format ToVkFormatDepth(u32 index) { + switch (index) { + case 1: return vk::Format::eD16Unorm; + case 2: return vk::Format::eX8D24UnormPack32; + case 3: return vk::Format::eD24UnormS8Uint; + default: return vk::Format::eUndefined; + } +} + +RenderpassCache::RenderpassCache(const Instance& instance) : instance{instance} { + // Pre-create all needed renderpasses by the renderer + for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { + for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { + if (color == 0 && depth == 0) { + continue; + } + + const vk::Format color_format = + color == 0 ? vk::Format::eUndefined : instance.GetFormatAlternative(ToVkFormatColor(color)); + const vk::Format depth_stencil_format = + depth == 0 ? vk::Format::eUndefined : instance.GetFormatAlternative(ToVkFormatDepth(depth)); + + cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format, + vk::AttachmentLoadOp::eLoad, + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eColorAttachmentOptimal); + cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format, + vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eColorAttachmentOptimal); + } + } +} + +RenderpassCache::~RenderpassCache() { + vk::Device device = instance.GetDevice(); + for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { + for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { + if (color == 0 && depth == 0) { + continue; + } + + auto& load_pass = cached_renderpasses[color][depth][0]; + auto& clear_pass = cached_renderpasses[color][depth][1]; + + // Destroy renderpasses + device.destroyRenderPass(load_pass); + device.destroyRenderPass(clear_pass); + } + } + + device.destroyRenderPass(present_renderpass); +} + +void RenderpassCache::CreatePresentRenderpass(vk::Format format) { + if (!present_renderpass) { + present_renderpass = CreateRenderPass(format, vk::Format::eUndefined, + vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eUndefined, + vk::ImageLayout::ePresentSrcKHR); + } +} + +vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear) const { + const u32 color_index = + color == VideoCore::PixelFormat::Invalid ? 0 : static_cast(color); + const u32 depth_index = + depth == VideoCore::PixelFormat::Invalid ? 0 : (static_cast(depth) - 13); + + ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS); + return cached_renderpasses[color_index][depth_index][is_clear]; +} + +vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op, + vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const { + // Define attachments + + u32 attachment_count = 0; + std::array attachments; + + bool use_color = false; + vk::AttachmentReference color_attachment_ref{}; + bool use_depth = false; + vk::AttachmentReference depth_attachment_ref{}; + + if (color != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = color, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = initial_layout, + .finalLayout = final_layout + }; + + color_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eColorAttachmentOptimal + }; + + use_color = true; + } + + if (depth != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = depth, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eLoad, + .stencilStoreOp = vk::AttachmentStoreOp::eStore, + .initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, + .finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + depth_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + use_depth = true; + } + + // We also require only one subpass + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = use_color ? 1u : 0u, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = attachment_count, + .pAttachments = attachments.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr + }; + + // Create the renderpass + vk::Device device = instance.GetDevice(); + return device.createRenderPass(renderpass_info); +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h new file mode 100644 index 000000000..5a2cd26f3 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -0,0 +1,46 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class Swapchain; + +constexpr u32 MAX_COLOR_FORMATS = 5; +constexpr u32 MAX_DEPTH_FORMATS = 3; + +class RenderpassCache { +public: + RenderpassCache(const Instance& instance); + ~RenderpassCache(); + + /// Creates the renderpass used when rendering to the swapchain + void CreatePresentRenderpass(vk::Format format); + + /// Returns the renderpass associated with the color-depth format pair + vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear) const; + + /// Returns the swapchain clear renderpass + vk::RenderPass GetPresentRenderpass() const { + return present_renderpass; + } + +private: + /// Creates a renderpass configured appropriately and stores it in cached_renderpasses + vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op, + vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const; + +private: + const Instance& instance; + vk::RenderPass present_renderpass{}; + vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2]; +}; + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.cpp b/src/video_core/renderer_vulkan/vk_shader.cpp new file mode 100644 index 000000000..156b78d7c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader.cpp @@ -0,0 +1,223 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include +#include +#include + +namespace Vulkan { + +constexpr TBuiltInResource DefaultTBuiltInResource = { + .maxLights = 32, + .maxClipPlanes = 6, + .maxTextureUnits = 32, + .maxTextureCoords = 32, + .maxVertexAttribs = 64, + .maxVertexUniformComponents = 4096, + .maxVaryingFloats = 64, + .maxVertexTextureImageUnits = 32, + .maxCombinedTextureImageUnits = 80, + .maxTextureImageUnits = 32, + .maxFragmentUniformComponents = 4096, + .maxDrawBuffers = 32, + .maxVertexUniformVectors = 128, + .maxVaryingVectors = 8, + .maxFragmentUniformVectors = 16, + .maxVertexOutputVectors = 16, + .maxFragmentInputVectors = 15, + .minProgramTexelOffset = -8, + .maxProgramTexelOffset = 7, + .maxClipDistances = 8, + .maxComputeWorkGroupCountX = 65535, + .maxComputeWorkGroupCountY = 65535, + .maxComputeWorkGroupCountZ = 65535, + .maxComputeWorkGroupSizeX = 1024, + .maxComputeWorkGroupSizeY = 1024, + .maxComputeWorkGroupSizeZ = 64, + .maxComputeUniformComponents = 1024, + .maxComputeTextureImageUnits = 16, + .maxComputeImageUniforms = 8, + .maxComputeAtomicCounters = 8, + .maxComputeAtomicCounterBuffers = 1, + .maxVaryingComponents = 60, + .maxVertexOutputComponents = 64, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxFragmentInputComponents = 128, + .maxImageUnits = 8, + .maxCombinedImageUnitsAndFragmentOutputs = 8, + .maxCombinedShaderOutputResources = 8, + .maxImageSamples = 0, + .maxVertexImageUniforms = 0, + .maxTessControlImageUniforms = 0, + .maxTessEvaluationImageUniforms = 0, + .maxGeometryImageUniforms = 0, + .maxFragmentImageUniforms = 8, + .maxCombinedImageUniforms = 8, + .maxGeometryTextureImageUnits = 16, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxGeometryUniformComponents = 1024, + .maxGeometryVaryingComponents = 64, + .maxTessControlInputComponents = 128, + .maxTessControlOutputComponents = 128, + .maxTessControlTextureImageUnits = 16, + .maxTessControlUniformComponents = 1024, + .maxTessControlTotalOutputComponents = 4096, + .maxTessEvaluationInputComponents = 128, + .maxTessEvaluationOutputComponents = 128, + .maxTessEvaluationTextureImageUnits = 16, + .maxTessEvaluationUniformComponents = 1024, + .maxTessPatchComponents = 120, + .maxPatchVertices = 32, + .maxTessGenLevel = 64, + .maxViewports = 16, + .maxVertexAtomicCounters = 0, + .maxTessControlAtomicCounters = 0, + .maxTessEvaluationAtomicCounters = 0, + .maxGeometryAtomicCounters = 0, + .maxFragmentAtomicCounters = 8, + .maxCombinedAtomicCounters = 8, + .maxAtomicCounterBindings = 1, + .maxVertexAtomicCounterBuffers = 0, + .maxTessControlAtomicCounterBuffers = 0, + .maxTessEvaluationAtomicCounterBuffers = 0, + .maxGeometryAtomicCounterBuffers = 0, + .maxFragmentAtomicCounterBuffers = 1, + .maxCombinedAtomicCounterBuffers = 1, + .maxAtomicCounterBufferSize = 16384, + .maxTransformFeedbackBuffers = 4, + .maxTransformFeedbackInterleavedComponents = 64, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .maxSamples = 4, + .maxMeshOutputVerticesNV = 256, + .maxMeshOutputPrimitivesNV = 512, + .maxMeshWorkGroupSizeX_NV = 32, + .maxMeshWorkGroupSizeY_NV = 1, + .maxMeshWorkGroupSizeZ_NV = 1, + .maxTaskWorkGroupSizeX_NV = 32, + .maxTaskWorkGroupSizeY_NV = 1, + .maxTaskWorkGroupSizeZ_NV = 1, + .maxMeshViewCountNV = 4, + .maxDualSourceDrawBuffersEXT = 1, + .limits = TLimits{ + .nonInductiveForLoops = 1, + .whileLoops = 1, + .doWhileLoops = 1, + .generalUniformIndexing = 1, + .generalAttributeMatrixVectorIndexing = 1, + .generalVaryingIndexing = 1, + .generalSamplerIndexing = 1, + .generalVariableIndexing = 1, + .generalConstantMatrixVectorIndexing = 1, + } +}; + +EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) { + switch (stage) { + case vk::ShaderStageFlagBits::eVertex: + return EShLanguage::EShLangVertex; + case vk::ShaderStageFlagBits::eGeometry: + return EShLanguage::EShLangGeometry; + case vk::ShaderStageFlagBits::eFragment: + return EShLanguage::EShLangFragment; + case vk::ShaderStageFlagBits::eCompute: + return EShLanguage::EShLangCompute; + default: + LOG_CRITICAL(Render_Vulkan, "Unkown shader stage"); + UNREACHABLE(); + } + + return EShLanguage::EShLangVertex; +} + +bool InitializeCompiler() { + static bool glslang_initialized = false; + + if (glslang_initialized) { + return true; + } + + if (!glslang::InitializeProcess()) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler"); + return false; + } + + std::atexit([]() { glslang::FinalizeProcess(); }); + + glslang_initialized = true; + return true; +} + +vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device, + ShaderOptimization level) { + if (!InitializeCompiler()) { + return VK_NULL_HANDLE; + } + + EProfile profile = ECoreProfile; + EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); + EShLanguage lang = ToEshShaderStage(stage); + + int default_version = 450; + const char* pass_source_code = code.data(); + int pass_source_code_length = static_cast(code.size()); + + auto shader = std::make_unique(lang); + shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3); + shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); + + glslang::TShader::ForbidIncluder includer; + if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) { + LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); + return VK_NULL_HANDLE; + } + + // Even though there's only a single shader, we still need to link it to generate SPV + auto program = std::make_unique(); + program->addShader(shader.get()); + if (!program->link(messages)) { + LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog()); + return VK_NULL_HANDLE; + } + + glslang::TIntermediate* intermediate = program->getIntermediate(lang); + std::vector out_code; + spv::SpvBuildLogger logger; + glslang::SpvOptions options; + + // Compile the SPIR-V module without optimizations for easier debugging in RenderDoc. + if (level == ShaderOptimization::Debug) { + intermediate->addSourceText(pass_source_code, pass_source_code_length); + options.generateDebugInfo = true; + options.disableOptimizer = true; + options.optimizeSize = false; + options.disassemble = false; + options.validate = true; + } else { + options.disableOptimizer = false; + options.stripDebugInfo = true; + } + + glslang::GlslangToSpv(*intermediate, out_code, &logger, &options); + + const std::string spv_messages = logger.getAllMessages(); + if (!spv_messages.empty()) { + LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages); + } + + const vk::ShaderModuleCreateInfo shader_info = { + .codeSize = out_code.size() * sizeof(u32), + .pCode = out_code.data() + }; + + return device.createShaderModule(shader_info); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.h b/src/video_core/renderer_vulkan/vk_shader.h new file mode 100644 index 000000000..14d225d88 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader.h @@ -0,0 +1,19 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +enum class ShaderOptimization { + High = 0, + Debug = 1 +}; + +vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, + vk::Device device, ShaderOptimization level); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp new file mode 100644 index 000000000..3c96ee08f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -0,0 +1,1753 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/bit_set.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "video_core/pica_state.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" +#include "video_core/video_core.h" + +using Pica::FramebufferRegs; +using Pica::LightingRegs; +using Pica::RasterizerRegs; +using Pica::TexturingRegs; +using TevStageConfig = TexturingRegs::TevStageConfig; +using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; + +namespace Vulkan { + +constexpr std::string_view UniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 + +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; + +layout (std140) uniform shader_data { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec4 clip_coef; +}; +)"; + +static std::string GetVertexInterfaceDeclaration(bool is_output) { + std::string out; + + const auto append_variable = [&](std::string_view var, int location) { + out += fmt::format("layout (location={}) ", location); + out += fmt::format("{}{};\n", is_output ? "out " : "in ", var); + }; + + append_variable("vec4 primary_color", ATTRIBUTE_COLOR); + append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0); + append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1); + append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2); + append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W); + append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); + append_variable("vec3 view", ATTRIBUTE_VIEW); + + if (is_output) { + // gl_PerVertex redeclaration is required for separate shader object + out += R"( +out gl_PerVertex { + vec4 gl_Position; +#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) + float gl_ClipDistance[2]; +#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) +}; +)"; + } + + return out; +} + +PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { + PicaFSConfig res{}; + + auto& state = res.state; + + state.scissor_test_mode = regs.rasterizer.scissor_test.mode; + + state.depthmap_enable = regs.rasterizer.depthmap_enable; + + state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable + ? regs.framebuffer.output_merger.alpha_test.func.Value() + : FramebufferRegs::CompareFunc::Always; + + state.texture0_type = regs.texturing.texture0.type; + + state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; + + state.alphablend_enable = {}; + state.logic_op = {}; + + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. + const auto& tev_stages = regs.texturing.GetTevStages(); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); + for (std::size_t i = 0; i < tev_stages.size(); i++) { + const auto& tev_stage = tev_stages[i]; + state.tev_stages[i].sources_raw = tev_stage.sources_raw; + state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + state.tev_stages[i].ops_raw = tev_stage.ops_raw; + state.tev_stages[i].scales_raw = tev_stage.scales_raw; + } + + state.fog_mode = regs.texturing.fog_mode; + state.fog_flip = regs.texturing.fog_flip != 0; + + state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() + << 4; + + // Fragment lighting + + state.lighting.enable = !regs.lighting.disable; + state.lighting.src_num = regs.lighting.max_light_index + 1; + + for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { + unsigned num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; + state.lighting.light[light_index].num = num; + state.lighting.light[light_index].directional = light.config.directional != 0; + state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; + state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0; + state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0; + state.lighting.light[light_index].dist_atten_enable = + !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].spot_atten_enable = + !regs.lighting.IsSpotAttenDisabled(num); + state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num); + } + + state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; + state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; + state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + // this is a dummy field due to lack of the corresponding register + state.lighting.lut_sp.enable = true; + state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; + state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); + state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); + + state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; + state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + + state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; + state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; + state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; + state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + + state.lighting.config = regs.lighting.config0.config; + state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha; + state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha; + state.lighting.bump_mode = regs.lighting.config0.bump_mode; + state.lighting.bump_selector = regs.lighting.config0.bump_selector; + state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; + state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; + + state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0; + state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0; + state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0; + state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0; + state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0; + state.lighting.shadow_selector = regs.lighting.config0.shadow_selector; + + state.proctex.enable = regs.texturing.main_config.texture3_enable; + if (state.proctex.enable) { + state.proctex.coord = regs.texturing.main_config.texture3_coordinates; + state.proctex.u_clamp = regs.texturing.proctex.u_clamp; + state.proctex.v_clamp = regs.texturing.proctex.v_clamp; + state.proctex.color_combiner = regs.texturing.proctex.color_combiner; + state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; + state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; + state.proctex.noise_enable = regs.texturing.proctex.noise_enable; + state.proctex.u_shift = regs.texturing.proctex.u_shift; + state.proctex.v_shift = regs.texturing.proctex.v_shift; + state.proctex.lut_width = regs.texturing.proctex_lut.width; + state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; + state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; + state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; + state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; + state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; + state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; + state.proctex.lut_filter = regs.texturing.proctex_lut.filter; + } + + state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + FramebufferRegs::FragmentOperationMode::Shadow; + + state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; + + return res; +} + +void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + swizzle_hash = setup.GetSwizzleDataHash(); + main_offset = regs.main_offset; + sanitize_mul = VideoCore::g_hw_shader_accurate_mul; + + num_outputs = 0; + output_map.fill(16); + + for (int reg : Common::BitSet(regs.output_mask)) { + output_map[reg] = num_outputs++; + } +} + +void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { + vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); + gs_output_attributes = vs_output_attributes; + + semantic_maps.fill({16, 0}); + for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { + const std::array semantics{ + regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), + }; + for (u32 comp = 0; comp < 4; ++comp) { + const auto semantic = semantics[comp]; + if (static_cast(semantic) < 24) { + semantic_maps[static_cast(semantic)] = {attrib, comp}; + } else if (semantic != VSOutputAttributes::INVALID) { + LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); + } + } + } +} + +/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) +static bool IsPassThroughTevStage(const TevStageConfig& stage) { + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); +} + +static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { + const auto& state = config.state; + switch (texture_unit) { + case 0: + // Only unit 0 respects the texturing type + switch (state.texture0_type) { + case TexturingRegs::TextureConfig::Texture2D: + return "textureLod(sampler2D(tex0, tex0_sampler), texcoord0, getLod(texcoord0 * vec2(textureSize(sampler2D(tex0, tex0_sampler), 0))))"; + case TexturingRegs::TextureConfig::Projection2D: + // TODO (wwylele): find the exact LOD formula for projection texture + return "textureProj(sampler2D(tex0, tex0_sampler), vec3(texcoord0, texcoord0_w))"; + case TexturingRegs::TextureConfig::TextureCube: + return "texture(samplerCube(tex_cube, tex_cube_sampler), vec3(texcoord0, texcoord0_w))"; + case TexturingRegs::TextureConfig::Shadow2D: + return "shadowTexture(texcoord0, texcoord0_w)"; + case TexturingRegs::TextureConfig::ShadowCube: + return "shadowTextureCube(texcoord0, texcoord0_w)"; + case TexturingRegs::TextureConfig::Disabled: + return "vec4(0.0)"; + default: + LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); + UNIMPLEMENTED(); + return "texture(sampler2D(tex0, tex0_sampler), texcoord0)"; + } + case 1: + return "textureLod(sampler2D(tex1, tex1_sampler), texcoord1, getLod(texcoord1 * vec2(textureSize(sampler2D(tex1, tex1_sampler), 0))))"; + case 2: + if (state.texture2_use_coord1) + return "textureLod(sampler2D(tex2, tex2_sampler), texcoord1, getLod(texcoord1 * vec2(textureSize(sampler2D(tex2, tex2_sampler), 0))))"; + else + return "textureLod(sampler2D(tex2, tex2_sampler), texcoord2, getLod(texcoord2 * vec2(textureSize(sampler2D(tex2, tex2_sampler), 0))))"; + case 3: + if (state.proctex.enable) { + return "ProcTex()"; + } else { + LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it"); + return "vec4(0.0)"; + } + default: + UNREACHABLE(); + return ""; + } +} + +/// Writes the specified TEV stage source component(s) +static void AppendSource(std::string& out, const PicaFSConfig& config, + TevStageConfig::Source source, std::string_view index_name) { + using Source = TevStageConfig::Source; + switch (source) { + case Source::PrimaryColor: + out += "rounded_primary_color"; + break; + case Source::PrimaryFragmentColor: + out += "primary_fragment_color"; + break; + case Source::SecondaryFragmentColor: + out += "secondary_fragment_color"; + break; + case Source::Texture0: + out += SampleTexture(config, 0); + break; + case Source::Texture1: + out += SampleTexture(config, 1); + break; + case Source::Texture2: + out += SampleTexture(config, 2); + break; + case Source::Texture3: + out += SampleTexture(config, 3); + break; + case Source::PreviousBuffer: + out += "combiner_buffer"; + break; + case Source::Constant: + out += "const_color["; + out += index_name; + out += ']'; + break; + case Source::Previous: + out += "last_tex_env_out"; + break; + default: + out += "vec4(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source); + break; + } +} + +/// Writes the color components to use for the specified TEV stage color modifier +static void AppendColorModifier(std::string& out, const PicaFSConfig& config, + TevStageConfig::ColorModifier modifier, + TevStageConfig::Source source, std::string_view index_name) { + using ColorModifier = TevStageConfig::ColorModifier; + switch (modifier) { + case ColorModifier::SourceColor: + AppendSource(out, config, source, index_name); + out += ".rgb"; + break; + case ColorModifier::OneMinusSourceColor: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".rgb"; + break; + case ColorModifier::SourceAlpha: + AppendSource(out, config, source, index_name); + out += ".aaa"; + break; + case ColorModifier::OneMinusSourceAlpha: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".aaa"; + break; + case ColorModifier::SourceRed: + AppendSource(out, config, source, index_name); + out += ".rrr"; + break; + case ColorModifier::OneMinusSourceRed: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".rrr"; + break; + case ColorModifier::SourceGreen: + AppendSource(out, config, source, index_name); + out += ".ggg"; + break; + case ColorModifier::OneMinusSourceGreen: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".ggg"; + break; + case ColorModifier::SourceBlue: + AppendSource(out, config, source, index_name); + out += ".bbb"; + break; + case ColorModifier::OneMinusSourceBlue: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".bbb"; + break; + default: + out += "vec3(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier); + break; + } +} + +/// Writes the alpha component to use for the specified TEV stage alpha modifier +static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config, + TevStageConfig::AlphaModifier modifier, + TevStageConfig::Source source, const std::string& index_name) { + using AlphaModifier = TevStageConfig::AlphaModifier; + switch (modifier) { + case AlphaModifier::SourceAlpha: + AppendSource(out, config, source, index_name); + out += ".a"; + break; + case AlphaModifier::OneMinusSourceAlpha: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".a"; + break; + case AlphaModifier::SourceRed: + AppendSource(out, config, source, index_name); + out += ".r"; + break; + case AlphaModifier::OneMinusSourceRed: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".r"; + break; + case AlphaModifier::SourceGreen: + AppendSource(out, config, source, index_name); + out += ".g"; + break; + case AlphaModifier::OneMinusSourceGreen: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".g"; + break; + case AlphaModifier::SourceBlue: + AppendSource(out, config, source, index_name); + out += ".b"; + break; + case AlphaModifier::OneMinusSourceBlue: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".b"; + break; + default: + out += "0.0"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier); + break; + } +} + +/// Writes the combiner function for the color components for the specified TEV stage operation +static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, + std::string_view variable_name) { + out += "clamp("; + using Operation = TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + out += fmt::format("{}[0]", variable_name); + break; + case Operation::Modulate: + out += fmt::format("{0}[0] * {0}[1]", variable_name); + break; + case Operation::Add: + out += fmt::format("{0}[0] + {0}[1]", variable_name); + break; + case Operation::AddSigned: + out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name); + break; + case Operation::Lerp: + out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name); + break; + case Operation::Subtract: + out += fmt::format("{0}[0] - {0}[1]", variable_name); + break; + case Operation::MultiplyThenAdd: + out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); + break; + case Operation::AddThenMultiply: + out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name); + break; + case Operation::Dot3_RGB: + case Operation::Dot3_RGBA: + out += + fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name); + break; + default: + out += "vec3(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation); + break; + } + out += ", vec3(0.0), vec3(1.0))"; // Clamp result to 0.0, 1.0 +} + +/// Writes the combiner function for the alpha component for the specified TEV stage operation +static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, + std::string_view variable_name) { + out += "clamp("; + using Operation = TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + out += fmt::format("{}[0]", variable_name); + break; + case Operation::Modulate: + out += fmt::format("{0}[0] * {0}[1]", variable_name); + break; + case Operation::Add: + out += fmt::format("{0}[0] + {0}[1]", variable_name); + break; + case Operation::AddSigned: + out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name); + break; + case Operation::Lerp: + out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name); + break; + case Operation::Subtract: + out += fmt::format("{0}[0] - {0}[1]", variable_name); + break; + case Operation::MultiplyThenAdd: + out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); + break; + case Operation::AddThenMultiply: + out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name); + break; + default: + out += "0.0"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation); + break; + } + out += ", 0.0, 1.0)"; +} + +/// Writes the if-statement condition used to evaluate alpha testing +static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) { + using CompareFunc = FramebufferRegs::CompareFunc; + switch (func) { + case CompareFunc::Never: + out += "true"; + break; + case CompareFunc::Always: + out += "false"; + break; + case CompareFunc::Equal: + case CompareFunc::NotEqual: + case CompareFunc::LessThan: + case CompareFunc::LessThanOrEqual: + case CompareFunc::GreaterThan: + case CompareFunc::GreaterThanOrEqual: { + static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; + const auto index = static_cast(func) - static_cast(CompareFunc::Equal); + out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]); + break; + } + + default: + out += "false"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func); + break; + } +} + +/// Writes the code to emulate the specified TEV stage +static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) { + const auto stage = + static_cast(config.state.tev_stages[index]); + if (!IsPassThroughTevStage(stage)) { + const std::string index_name = std::to_string(index); + + out += fmt::format("vec3 color_results_{}_1 = ", index_name); + AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); + out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name); + AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); + out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name); + AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); + out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, " + "color_results_{}_2, color_results_{}_3);\n", + index_name, index_name, index_name, index_name); + + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + out += fmt::format("vec3 color_output_{} = byteround(", index_name); + AppendColorCombiner(out, stage.color_op, "color_results_" + index_name); + out += ");\n"; + + if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name); + } else { + out += fmt::format("float alpha_results_{}[3] = float[3](", index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, + index_name); + out += ", "; + AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, + index_name); + out += ", "; + AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, + index_name); + out += ");\n"; + + out += fmt::format("float alpha_output_{} = byteround(", index_name); + AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name); + out += ");\n"; + } + + out += fmt::format("last_tex_env_out = vec4(" + "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " + "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", + index_name, stage.GetColorMultiplier(), index_name, + stage.GetAlphaMultiplier()); + } + + out += "combiner_buffer = next_combiner_buffer;\n"; + + if (config.TevStageUpdatesCombinerBufferColor(index)) + out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n"; + + if (config.TevStageUpdatesCombinerBufferAlpha(index)) + out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; +} + +/// Writes the code to emulate fragment lighting +static void WriteLighting(std::string& out, const PicaFSConfig& config) { + const auto& lighting = config.state.lighting; + + // Define lighting globals + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec3 light_vector = vec3(0.0);\n" + "vec3 refl_value = vec3(0.0);\n" + "vec3 spot_dir = vec3(0.0);\n" + "vec3 half_vector = vec3(0.0);\n" + "float dot_product = 0.0;\n" + "float clamp_highlights = 1.0;\n" + "float geo_factor = 1.0;\n"; + + // Compute fragment normals and tangents + const auto Perturbation = [&] { + return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); + }; + if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { + // Bump mapping is enabled using a normal map + out += fmt::format("vec3 surface_normal = {};\n", Perturbation()); + + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher + // precision result + if (lighting.bump_renorm) { + constexpr std::string_view val = + "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; + out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); + } + + // The tangent vector is not perturbed by the normal map and is just a unit vector. + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { + // Bump mapping is enabled using a tangent map + out += fmt::format("vec3 surface_tangent = {};\n", Perturbation()); + // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant + // computation below, which is also confirmed on 3DS. So we don't bother recomputing here + // even if 'renorm' is enabled. + + // The normal vector is not perturbed by the tangent map and is just a unit vector. + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + } else { + // No bump mapping - surface local normal and tangent are just unit vectors + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" + "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } + + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to + // eyespace. + out += "vec4 normalized_normquat = normalize(normquat);\n" + "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n" + "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; + + if (lighting.enable_shadow) { + std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); + if (lighting.shadow_invert) { + out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); + } else { + out += fmt::format("vec4 shadow = {};\n", shadow_texture); + } + } else { + out += "vec4 shadow = vec4(1.0);\n"; + } + + // Samples the specified lookup table for specular lighting + auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, + LightingRegs::LightingLutInput input, bool abs) { + std::string index; + switch (input) { + case LightingRegs::LightingLutInput::NH: + index = "dot(normal, normalize(half_vector))"; + break; + + case LightingRegs::LightingLutInput::VH: + index = "dot(normalize(view), normalize(half_vector))"; + break; + + case LightingRegs::LightingLutInput::NV: + index = "dot(normal, normalize(view))"; + break; + + case LightingRegs::LightingLutInput::LN: + index = "dot(light_vector, normal)"; + break; + + case LightingRegs::LightingLutInput::SP: + index = "dot(light_vector, spot_dir)"; + break; + + case LightingRegs::LightingLutInput::CP: + // CP input is only available with configuration 7 + if (lighting.config == LightingRegs::LightingConfig::Config7) { + // Note: even if the normal vector is modified by normal map, which is not the + // normal of the tangent plane anymore, the half angle vector is still projected + // using the modified normal vector. + constexpr std::string_view half_angle_proj = + "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; + // Note: the half angle vector projection is confirmed not normalized before the dot + // product. The result is in fact not cos(phi) as the name suggested. + index = fmt::format("dot({}, tangent)", half_angle_proj); + } else { + index = "0.0"; + } + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input); + UNIMPLEMENTED(); + index = "0.0"; + break; + } + + const auto sampler_index = static_cast(sampler); + + if (abs) { + // LUT index is in the range of (0.0, 1.0) + index = lighting.light[light_num].two_sided_diffuse + ? fmt::format("abs({})", index) + : fmt::format("max({}, 0.0)", index); + return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index); + } else { + // LUT index is in the range of (-1.0, 1.0) + return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index); + } + }; + + // Write the code to emulate each enabled light + for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { + const auto& light_config = lighting.light[light_index]; + const std::string light_src = fmt::format("light_src[{}]", light_config.num); + + // Compute light vector (directional or positional) + if (light_config.directional) { + out += fmt::format("light_vector = normalize({}.position);\n", light_src); + } else { + out += fmt::format("light_vector = normalize({}.position + view);\n", light_src); + } + + out += fmt::format("spot_dir = {}.spot_direction;\n", light_src); + out += "half_vector = normalize(view) + light_vector;\n"; + + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or + // two-sided + out += std::string("dot_product = ") + (light_config.two_sided_diffuse + ? "abs(dot(light_vector, normal));\n" + : "max(dot(light_vector, normal), 0.0);\n"); + + // If enabled, clamp specular component if lighting result is zero + if (lighting.clamp_highlights) { + out += "clamp_highlights = sign(dot_product);\n"; + } + + // If enabled, compute spot light attenuation value + std::string spot_atten = "1.0"; + if (light_config.spot_atten_enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { + const std::string value = + GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num), + light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); + spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value); + } + + // If enabled, compute distance attenuation value + std::string dist_atten = "1.0"; + if (light_config.dist_atten_enable) { + const std::string index = fmt::format("clamp({}.dist_atten_scale * length(-view - " + "{}.position) + {}.dist_atten_bias, 0.0, 1.0)", + light_src, light_src, light_src); + const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); + dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index); + } + + if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { + out += "geo_factor = dot(half_vector, half_vector);\n" + "geo_factor = geo_factor == 0.0 ? 0.0 : min(" + "dot_product / geo_factor, 1.0);\n"; + } + + // Specular 0 component + std::string d0_lut_value = "1.0"; + if (lighting.lut_d0.enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::Distribution0)) { + // Lookup specular "distribution 0" LUT value + const std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num, + lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value); + } + std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src); + if (light_config.geometric_factor_0) { + specular_0 = fmt::format("({} * geo_factor)", specular_0); + } + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.lut_rr.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectRed)) { + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num, + lighting.lut_rr.type, lighting.lut_rr.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value); + out += fmt::format("refl_value.r = {};\n", value); + } else { + out += "refl_value.r = 1.0;\n"; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.lut_rg.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectGreen)) { + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num, + lighting.lut_rg.type, lighting.lut_rg.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value); + out += fmt::format("refl_value.g = {};\n", value); + } else { + out += "refl_value.g = refl_value.r;\n"; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.lut_rb.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectBlue)) { + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num, + lighting.lut_rb.type, lighting.lut_rb.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value); + out += fmt::format("refl_value.b = {};\n", value); + } else { + out += "refl_value.b = refl_value.r;\n"; + } + + // Specular 1 component + std::string d1_lut_value = "1.0"; + if (lighting.lut_d1.enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::Distribution1)) { + // Lookup specular "distribution 1" LUT value + const std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num, + lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value); + } + std::string specular_1 = + fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src); + if (light_config.geometric_factor_1) { + specular_1 = fmt::format("({} * geo_factor)", specular_1); + } + + // Fresnel + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::Fresnel)) { + // Lookup fresnel LUT value + std::string value = + GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num, + lighting.lut_fr.type, lighting.lut_fr.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value); + + // Enabled for diffuse lighting alpha component + if (lighting.enable_primary_alpha) { + out += fmt::format("diffuse_sum.a = {};\n", value); + } + + // Enabled for the specular lighting alpha component + if (lighting.enable_secondary_alpha) { + out += fmt::format("specular_sum.a = {};\n", value); + } + } + + bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable; + bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable; + std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : ""; + std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : ""; + + // Compute primary fragment color (diffuse lighting) function + out += fmt::format( + "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n", + light_src, light_src, dist_atten, spot_atten, shadow_primary); + + // Compute secondary fragment color (specular lighting) function + out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n", + specular_0, specular_1, dist_atten, spot_atten, shadow_secondary); + } + + // Apply shadow attenuation to alpha components if enabled + if (lighting.shadow_alpha) { + if (lighting.enable_primary_alpha) { + out += "diffuse_sum.a *= shadow.a;\n"; + } + if (lighting.enable_secondary_alpha) { + out += "specular_sum.a *= shadow.a;\n"; + } + } + + // Sum final lighting result + out += "diffuse_sum.rgb += lighting_global_ambient;\n" + "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n" + "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; +} + +using ProcTexClamp = TexturingRegs::ProcTexClamp; +using ProcTexShift = TexturingRegs::ProcTexShift; +using ProcTexCombiner = TexturingRegs::ProcTexCombiner; +using ProcTexFilter = TexturingRegs::ProcTexFilter; + +static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, + ProcTexClamp clamp_mode) { + const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; + switch (mode) { + case ProcTexShift::None: + out += "0.0"; + break; + case ProcTexShift::Odd: + out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v); + break; + case ProcTexShift::Even: + out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); + out += "0.0"; + break; + } +} + +static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { + switch (mode) { + case ProcTexClamp::ToZero: + out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); + break; + case ProcTexClamp::ToEdge: + out += fmt::format("{0} = min({0}, 1.0);\n", var); + break; + case ProcTexClamp::SymmetricalRepeat: + out += fmt::format("{0} = fract({0});\n", var); + break; + case ProcTexClamp::MirroredRepeat: { + out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var); + break; + } + case ProcTexClamp::Pulse: + out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); + out += fmt::format("{0} = min({0}, 1.0);\n", var); + break; + } +} + +static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, + std::string_view offset) { + const auto combined = [combiner]() -> std::string_view { + switch (combiner) { + case ProcTexCombiner::U: + return "u"; + case ProcTexCombiner::U2: + return "(u * u)"; + case TexturingRegs::ProcTexCombiner::V: + return "v"; + case TexturingRegs::ProcTexCombiner::V2: + return "(v * v)"; + case TexturingRegs::ProcTexCombiner::Add: + return "((u + v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::Add2: + return "((u * u + v * v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::SqrtAdd2: + return "min(sqrt(u * u + v * v), 1.0)"; + case TexturingRegs::ProcTexCombiner::Min: + return "min(u, v)"; + case TexturingRegs::ProcTexCombiner::Max: + return "max(u, v)"; + case TexturingRegs::ProcTexCombiner::RMax: + return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; + default: + LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); + return "0.0"; + } + }(); + + out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); +} + +static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { + // LUT sampling uitlity + // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and + // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using + // value entries and difference entries. + out += R"( +float ProcTexLookupLUT(int offset, float coord) { + coord *= 128.0; + float index_i = clamp(floor(coord), 0.0, 127.0); + float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be + // extracted as index_i = 127.0 and index_f = 1.0 + vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; + return clamp(entry.r + entry.g * index_f, 0.0, 1.0); +} + )"; + + // Noise utility + if (config.state.proctex.noise_enable) { + // See swrasterizer/proctex.cpp for more information about these functions + out += R"( +int ProcTexNoiseRand1D(int v) { + const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); + return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; +} + +float ProcTexNoiseRand2D(vec2 point) { + const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); + int u2 = ProcTexNoiseRand1D(int(point.x)); + int v2 = ProcTexNoiseRand1D(int(point.y)); + v2 += ((u2 & 3) == 1) ? 4 : 0; + v2 ^= (u2 & 1) * 6; + v2 += 10 + u2; + v2 &= 0xF; + v2 ^= table[u2]; + return -1.0 + float(v2) * 2.0/ 15.0; +} + +float ProcTexNoiseCoef(vec2 x) { + vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); + vec2 point = floor(grid); + vec2 frac = grid - point; + + float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); + float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); + float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); + float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); + + float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); + float x0 = mix(g0, g1, x_noise); + float x1 = mix(g2, g3, x_noise); + return mix(x0, x1, y_noise); +} + )"; + } + + out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; + out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width); + // Offsets for level 4-7 seem to be hardcoded + out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", + config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, + config.state.proctex.lut_offset2, config.state.proctex.lut_offset3); + out += "int lut_offset = lut_offsets[level];\n"; + // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] + out += "lut_coord *= float(lut_width - 1);\n"; + + switch (config.state.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::LinearMipmapLinear: + case ProcTexFilter::LinearMipmapNearest: + out += "int lut_index_i = int(lut_coord) + lut_offset;\n"; + out += "float lut_index_f = fract(lut_coord);\n"; + out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + " + "proctex_lut_offset) + " + "lut_index_f * " + "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; + break; + case ProcTexFilter::Nearest: + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::NearestMipmapNearest: + out += "lut_coord += float(lut_offset);\n"; + out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " + "proctex_lut_offset);\n"; + break; + } + + out += "}\n"; + + out += "vec4 ProcTex() {\n"; + if (config.state.proctex.coord < 3) { + out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord); + } else { + LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); + out += "vec2 uv = abs(texcoord0);\n"; + } + + // This LOD formula is the same as the LOD upper limit defined in OpenGL. + // f(x, y) <= m_u + m_v + m_w + // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) + // Note: this is different from the one normal 2D textures use. + out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; + // unlike normal texture, the bias is inside the log2 + out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", + config.state.proctex.lut_width); + out += "if (proctex_bias == 0.0) lod = 0.0;\n"; + out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", + std::max(0.0f, static_cast(config.state.proctex.lod_min)), + std::min(7.0f, static_cast(config.state.proctex.lod_max))); + // Get shift offset before noise generation + out += "float u_shift = "; + AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, + config.state.proctex.u_clamp); + out += ";\n"; + out += "float v_shift = "; + AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, + config.state.proctex.v_clamp); + out += ";\n"; + + // Generate noise + if (config.state.proctex.noise_enable) { + out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" + "uv = abs(uv);\n"; + } + + // Shift + out += "float u = uv.x + u_shift;\n" + "float v = uv.y + v_shift;\n"; + + // Clamp + AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); + AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); + + // Combine and map + out += "float lut_coord = "; + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, + "proctex_color_map_offset"); + out += ";\n"; + + switch (config.state.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::Nearest: + out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; + break; + case ProcTexFilter::NearestMipmapNearest: + case ProcTexFilter::LinearMipmapNearest: + out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n"; + break; + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::LinearMipmapLinear: + out += "int lod_i = int(lod);\n" + "float lod_f = fract(lod);\n" + "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), " + "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n"; + break; + } + + if (config.state.proctex.separate_alpha) { + // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It + // uses the output of CombineAndMap directly instead. + out += "float final_alpha = "; + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, + "proctex_alpha_map_offset"); + out += ";\n"; + out += "return vec4(final_color.xyz, final_alpha);\n}\n"; + } else { + out += "return final_color;\n}\n"; + } +} + +std::string GenerateFragmentShader(const PicaFSConfig& config) { + const auto& state = config.state; + std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; + out += GetVertexInterfaceDeclaration(false); + + out += R"( +in vec4 gl_FragCoord; + +layout (location = 0) out vec4 color; + +layout(set = 0, binding = 2) uniform samplerBuffer texture_buffer_lut_lf; +layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_rg; +layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rgba; + +layout(set = 1, binding = 0) uniform texture2D tex0; +layout(set = 1, binding = 1) uniform texture2D tex1; +layout(set = 1, binding = 2) uniform texture2D tex2; +layout(set = 1, binding = 3) uniform textureCube tex_cube; + +layout(set = 2, binding = 0) uniform sampler tex0_sampler; +layout(set = 2, binding = 1) uniform sampler tex1_sampler; +layout(set = 2, binding = 2) uniform sampler tex2_sampler; +layout(set = 2, binding = 3) uniform sampler tex_cube_sampler; + +layout(set = 3, binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; +layout(set = 3, binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; +layout(set = 3, binding = 2, r32ui) uniform readonly uimage2D shadow_texture_py; +layout(set = 3, binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; +layout(set = 3, binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; +layout(set = 3, binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; +layout(set = 3, binding = 6, r32ui) uniform uimage2D shadow_buffer; +)"; + + out += UniformBlockDef; + + out += R"( +// Rotate the vector v by the quaternion q +vec3 quaternion_rotate(vec4 q, vec3 v) { + return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); +} + +float LookupLightingLUT(int lut_index, int index, float delta) { + vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; + return entry.r + entry.g * delta; +} + +float LookupLightingLUTUnsigned(int lut_index, float pos) { + int index = clamp(int(pos * 256.0), 0, 255); + float delta = pos * 256.0 - float(index); + return LookupLightingLUT(lut_index, index, delta); +} + +float LookupLightingLUTSigned(int lut_index, float pos) { + int index = clamp(int(pos * 128.0), -128, 127); + float delta = pos * 128.0 - float(index); + if (index < 0) index += 256; + return LookupLightingLUT(lut_index, index, delta); +} + +float byteround(float x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec2 byteround(vec2 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec3 byteround(vec3 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec4 byteround(vec4 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +// PICA's LOD formula for 2D textures. +// This LOD formula is the same as the LOD lower limit defined in OpenGL. +// f(x, y) >= max{m_u, m_v, m_w} +// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) +float getLod(vec2 coord) { + vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord))); + return log2(max(d.x, d.y)); +} + +uvec2 DecodeShadow(uint pixel) { + return uvec2(pixel >> 8, pixel & 0xFFu); +} + +uint EncodeShadow(uvec2 pixel) { + return (pixel.x << 8) | pixel.y; +} + +float CompareShadow(uint pixel, uint z) { + uvec2 p = DecodeShadow(pixel); + return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); +} + +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + return 1.0; + return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); +} + +float mix2(vec4 s, vec2 a) { + vec2 t = mix(s.xy, s.zw, a.yy); + return mix(t.x, t.y, a.x); +} + +vec4 shadowTexture(vec2 uv, float w) { +)"; + if (!config.state.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; + out += R"( + vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i = ivec2(coord_floor); + vec4 s = vec4( + SampleShadow2D(i , z), + SampleShadow2D(i + ivec2(1, 0), z), + SampleShadow2D(i + ivec2(0, 1), z), + SampleShadow2D(i + ivec2(1, 1), z)); + return vec4(mix2(s, f)); +} + +vec4 shadowTextureCube(vec2 uv, float w) { + ivec2 size = imageSize(shadow_texture_px); + vec3 c = vec3(uv, w); + vec3 a = abs(c); + if (a.x > a.y && a.x > a.z) { + w = a.x; + uv = -c.zy; + if (c.x < 0.0) uv.x = -uv.x; + } else if (a.y > a.z) { + w = a.y; + uv = c.xz; + if (c.y < 0.0) uv.y = -uv.y; + } else { + w = a.z; + uv = -c.xy; + if (c.z > 0.0) uv.x = -uv.x; + } +)"; + out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; + out += R"( + vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i00 = ivec2(coord_floor); + ivec2 i10 = i00 + ivec2(1, 0); + ivec2 i01 = i00 + ivec2(0, 1); + ivec2 i11 = i00 + ivec2(1, 1); + ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); + i00 = clamp(i00, cmin, cmax); + i10 = clamp(i10, cmin, cmax); + i01 = clamp(i01, cmin, cmax); + i11 = clamp(i11, cmin, cmax); + uvec4 pixels; + // This part should have been refactored into functions, + // but many drivers don't like passing uimage2D as parameters + if (a.x > a.y && a.x > a.z) { + if (c.x > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_px, i00).r, + imageLoad(shadow_texture_px, i10).r, + imageLoad(shadow_texture_px, i01).r, + imageLoad(shadow_texture_px, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nx, i00).r, + imageLoad(shadow_texture_nx, i10).r, + imageLoad(shadow_texture_nx, i01).r, + imageLoad(shadow_texture_nx, i11).r); + } else if (a.y > a.z) { + if (c.y > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_py, i00).r, + imageLoad(shadow_texture_py, i10).r, + imageLoad(shadow_texture_py, i01).r, + imageLoad(shadow_texture_py, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_ny, i00).r, + imageLoad(shadow_texture_ny, i10).r, + imageLoad(shadow_texture_ny, i01).r, + imageLoad(shadow_texture_ny, i11).r); + } else { + if (c.z > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_pz, i00).r, + imageLoad(shadow_texture_pz, i10).r, + imageLoad(shadow_texture_pz, i01).r, + imageLoad(shadow_texture_pz, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nz, i00).r, + imageLoad(shadow_texture_nz, i10).r, + imageLoad(shadow_texture_nz, i01).r, + imageLoad(shadow_texture_nz, i11).r); + } + vec4 s = vec4( + CompareShadow(pixels.x, z), + CompareShadow(pixels.y, z), + CompareShadow(pixels.z, z), + CompareShadow(pixels.w, z)); + return vec4(mix2(s, f)); +} +)"; + + if (config.state.proctex.enable) + AppendProcTexSampler(out, config); + + // We round the interpolated primary color to the nearest 1/255th + // This maintains the PICA's 8 bits of precision + out += R"( +void main() { +vec4 rounded_primary_color = byteround(primary_color); +vec4 primary_fragment_color = vec4(0.0); +vec4 secondary_fragment_color = vec4(0.0); +)"; + + // Do not do any sort of processing if it's obvious we're not going to pass the alpha test + if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { + out += "discard; }"; + return out; + } + + // Append the scissor test + if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) { + out += "if ("; + // Negate the condition if we have to keep only the pixels outside the scissor box + if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { + out += '!'; + } + out += "(gl_FragCoord.x >= float(scissor_x1) && " + "gl_FragCoord.y >= float(scissor_y1) && " + "gl_FragCoord.x < float(scissor_x2) && " + "gl_FragCoord.y < float(scissor_y2))) discard;\n"; + } + + // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use + // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then + // do our own transformation according to PICA specification. + out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n" + "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + + if (state.lighting.enable) + WriteLighting(out, config); + + out += "vec4 combiner_buffer = vec4(0.0);\n" + "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n" + "vec4 last_tex_env_out = vec4(0.0);\n"; + + for (std::size_t index = 0; index < state.tev_stages.size(); ++index) { + WriteTevStage(out, config, static_cast(index)); + } + + if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) { + out += "if ("; + AppendAlphaTestCondition(out, state.alpha_test_func); + out += ") discard;\n"; + } + + // Append fog combiner + if (state.fog_mode == TexturingRegs::FogMode::Fog) { + // Get index into fog LUT + if (state.fog_flip) { + out += "float fog_index = (1.0 - float(depth)) * 128.0;\n"; + } else { + out += "float fog_index = depth * 128.0;\n"; + } + + // Generate clamped fog factor from LUT for given fog index + out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" + "float fog_f = fog_index - fog_i;\n" + "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " + "fog_lut_offset).rg;\n" + "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" + "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; + + // Blend the fog + out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; + } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { + Core::System::GetInstance().TelemetrySession().AddField( + Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); + LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); + out += "discard; }"; + return out; + } + + if (state.shadow_rendering) { + out += R"( +uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); +uint s = uint(last_tex_env_out.g * float(0xFF)); +ivec2 image_coord = ivec2(gl_FragCoord.xy); + +uint old = imageLoad(shadow_buffer, image_coord).x; +uint new; +uint old2; +do { + old2 = old; + + uvec2 ref = DecodeShadow(old); + if (d < ref.x) { + if (s == 0u) { + ref.x = d; + } else { + s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); + ref.y = min(s, ref.y); + } + } + new = EncodeShadow(ref); + +} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2); +)"; + } else { + out += "gl_FragDepth = depth;\n"; + // Round the final fragment color to maintain the PICA's 8 bits of precision + out += "color = byteround(last_tex_env_out);\n"; + } + + out += '}'; + return out; +} + +std::string GenerateTrivialVertexShader() { + std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; + out += + fmt::format("layout(location = {}) in vec4 vert_position;\n" + "layout(location = {}) in vec4 vert_color;\n" + "layout(location = {}) in vec2 vert_texcoord0;\n" + "layout(location = {}) in vec2 vert_texcoord1;\n" + "layout(location = {}) in vec2 vert_texcoord2;\n" + "layout(location = {}) in float vert_texcoord0_w;\n" + "layout(location = {}) in vec4 vert_normquat;\n" + "layout(location = {}) in vec3 vert_view;\n", + ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); + + out += GetVertexInterfaceDeclaration(true); + + out += UniformBlockDef; + + out += R"( + +void main() { + primary_color = vert_color; + texcoord0 = vert_texcoord0; + texcoord1 = vert_texcoord1; + texcoord2 = vert_texcoord2; + texcoord0_w = vert_texcoord0_w; + normquat = vert_normquat; + view = vert_view; + gl_Position = vert_position; +#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) + gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + gl_ClipDistance[1] = dot(clip_coef, vert_position); +#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) +} +)"; + + return out; +} + +std::optional GenerateVertexShader( + const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config) { + /*std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; + out += ShaderDecompiler::GetCommonDeclarations(); + + std::array used_regs{}; + const auto get_input_reg = [&used_regs](u32 reg) { + ASSERT(reg < 16); + used_regs[reg] = true; + return fmt::format("vs_in_reg{}", reg); + }; + + const auto get_output_reg = [&](u32 reg) -> std::string { + ASSERT(reg < 16); + if (config.state.output_map[reg] < config.state.num_outputs) { + return fmt::format("vs_out_attr{}", config.state.output_map[reg]); + } + return ""; + }; + + auto program_source_opt = ShaderDecompiler::DecompileProgram( + setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg, + get_output_reg, config.state.sanitize_mul); + + if (!program_source_opt) + return std::nullopt; + + std::string& program_source = program_source_opt->code; + + out += R"( +#define uniforms vs_uniforms +layout (std140) uniform vs_config { + pica_uniforms uniforms; +}; + +)"; + // input attributes declaration + for (std::size_t i = 0; i < used_regs.size(); ++i) { + if (used_regs[i]) { + out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i); + } + } + out += '\n'; + + // output attributes declaration + for (u32 i = 0; i < config.state.num_outputs; ++i) { + out += "layout(location = " + std::to_string(i) + ") out vec4 vs_out_attr" + std::to_string(i) + ";\n"; + } + + out += "\nvoid main() {\n"; + for (u32 i = 0; i < config.state.num_outputs; ++i) { + out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); + } + out += "\n exec_shader();\n}\n\n"; + + out += program_source; + + return {{std::move(out)}};*/ + return std::nullopt; +} + +static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config) { + /*std::string out = GetVertexInterfaceDeclaration(true, separable_shader); + out += UniformBlockDef; + out += ShaderDecompiler::GetCommonDeclarations(); + + out += '\n'; + for (u32 i = 0; i < config.vs_output_attributes; ++i) { + out += ("layout(location = " + std::to_string(i) + ") in vec4 vs_out_attr" + std::to_string(i) + "[];\n"; + } + + out += R"( +struct Vertex { +)"; + out += fmt::format(" vec4 attributes[{}];\n", config.gs_output_attributes); + out += "};\n\n"; + + const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string { + const u32 slot = static_cast(slot_semantic); + const u32 attrib = config.semantic_maps[slot].attribute_index; + const u32 comp = config.semantic_maps[slot].component_index; + if (attrib < config.gs_output_attributes) { + return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]); + } + return "0.0"; + }; + + out += "vec4 GetVertexQuaternion(Vertex vtx) {\n"; + out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + + semantic(VSOutputAttributes::QUATERNION_Z) + ", " + + semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; + out += "}\n\n"; + + out += "void EmitVtx(Vertex vtx, bool quats_opposite) {\n"; + out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + + semantic(VSOutputAttributes::POSITION_Y) + ", " + + semantic(VSOutputAttributes::POSITION_Z) + ", " + + semantic(VSOutputAttributes::POSITION_W) + ");\n"; + out += " gl_Position = vtx_pos;\n"; + out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n"; + out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 + out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; + out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n"; + + out += " vec4 vtx_quat = GetVertexQuaternion(vtx);\n"; + out += " normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n"; + + out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + + semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + + ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; + out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; + + out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; + out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; + + out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; + out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + + semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + + ");\n\n"; + + out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; + + out += " EmitVertex();\n"; + out += "}\n"; + + out += R"( +bool AreQuaternionsOpposite(vec4 qa, vec4 qb) { + return (dot(qa, qb) < 0.0); +} + +void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { + EmitVtx(vtx0, false); + EmitVtx(vtx1, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx1))); + EmitVtx(vtx2, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx2))); + EndPrimitive(); +} +)"; + + return out;*/ + return ""; +}; + +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) { + std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; + + out += R"( +layout(triangles) in; +layout(triangle_strip, max_vertices = 3) out; + +)"; + + out += GetGSCommonSource(config.state); + + out += R"( +void main() { + Vertex prim_buffer[3]; +)"; + for (u32 vtx = 0; vtx < 3; ++vtx) { + out += fmt::format(" prim_buffer[{}].attributes = vec4[{}](", vtx, + config.state.gs_output_attributes); + for (u32 i = 0; i < config.state.vs_output_attributes; ++i) { + out += fmt::format("{}vs_out_attr{}[{}]", i == 0 ? "" : ", ", i, vtx); + } + out += ");\n"; + } + out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n"; + out += "}\n"; + + return out; +} +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h new file mode 100644 index 000000000..7686edc25 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -0,0 +1,247 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include "common/hash.h" +#include "video_core/regs.h" +#include "video_core/shader/shader.h" + +namespace Vulkan { + +enum Attributes { + ATTRIBUTE_POSITION, + ATTRIBUTE_COLOR, + ATTRIBUTE_TEXCOORD0, + ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, +}; + +// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() +struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { + Pica::TexturingRegs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } +}; + +struct PicaFSConfigState { + Pica::FramebufferRegs::CompareFunc alpha_test_func; + Pica::RasterizerRegs::ScissorMode scissor_test_mode; + Pica::TexturingRegs::TextureConfig::TextureType texture0_type; + bool texture2_use_coord1; + std::array tev_stages; + u8 combiner_buffer_input; + + Pica::RasterizerRegs::DepthBuffering depthmap_enable; + Pica::TexturingRegs::FogMode fog_mode; + bool fog_flip; + bool alphablend_enable; + Pica::FramebufferRegs::LogicOp logic_op; + + struct { + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; + bool shadow_enable; + } light[8]; + + bool enable; + unsigned src_num; + Pica::LightingRegs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::LightingRegs::LightingConfig config; + bool enable_primary_alpha; + bool enable_secondary_alpha; + + bool enable_shadow; + bool shadow_primary; + bool shadow_secondary; + bool shadow_invert; + bool shadow_alpha; + unsigned shadow_selector; + + struct { + bool enable; + bool abs_input; + Pica::LightingRegs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + struct { + bool enable; + u32 coord; + Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; + Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; + bool separate_alpha; + bool noise_enable; + Pica::TexturingRegs::ProcTexShift u_shift, v_shift; + u32 lut_width; + u32 lut_offset0; + u32 lut_offset1; + u32 lut_offset2; + u32 lut_offset3; + u32 lod_min; + u32 lod_max; + Pica::TexturingRegs::ProcTexFilter lut_filter; + } proctex; + + bool shadow_rendering; + bool shadow_texture_orthographic; +}; + +/** + * This struct contains all state used to generate the GLSL fragment shader that emulates the + * current Pica register configuration. This struct is used as a cache key for generated GLSL shader + * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by + * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where + * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) + * two separate shaders sharing the same key. + */ +struct PicaFSConfig : Common::HashableStruct { + + /// Construct a PicaFSConfig with the given Pica register configuration. + static PicaFSConfig BuildFromRegs(const Pica::Regs& regs); + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); + } +}; + +/** + * This struct contains common information to identify a GL vertex/geometry shader generated from + * PICA vertex/geometry shader. + */ +struct PicaShaderConfigCommon { + void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); + + u64 program_hash; + u64 swizzle_hash; + u32 main_offset; + bool sanitize_mul; + + u32 num_outputs; + + // output_map[output register index] -> output attribute index + std::array output_map; +}; + +/** + * This struct contains information to identify a GL vertex shader generated from PICA vertex + * shader. + */ +struct PicaVSConfig : Common::HashableStruct { + explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + state.Init(regs, setup); + } + explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { + state = conf; + } +}; + +struct PicaGSConfigCommonRaw { + void Init(const Pica::Regs& regs); + + u32 vs_output_attributes; + u32 gs_output_attributes; + + struct SemanticMap { + u32 attribute_index; + u32 component_index; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps; +}; + +/** + * This struct contains information to identify a GL geometry shader generated from PICA no-geometry + * shader pipeline + */ +struct PicaFixedGSConfig : Common::HashableStruct { + explicit PicaFixedGSConfig(const Pica::Regs& regs) { + state.Init(regs); + } +}; + +/** + * Generates the GLSL vertex shader program source code that accepts vertices from software shader + * and directly passes them to the fragment shader. + * @param separable_shader generates shader that can be used for separate shader object + * @returns String of the shader source code + */ +std::string GenerateTrivialVertexShader(); + +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code; boost::none on failure + */ +std::optional GenerateVertexShader( + const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config); + +/** + * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline + * @returns String of the shader source code + */ +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config); + +/** + * Generates the GLSL fragment shader program source code for the current Pica state + * @param config ShaderCacheKey object generated for the current Pica state, used for the shader + * configuration (NOTE: Use state in this struct only, not the Pica registers!) + * @param separable_shader generates shader that can be used for separate shader object + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const PicaFSConfig& config); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..60629ae33 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,241 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) { + std::pair result{}; + switch (usage) { + case vk::BufferUsageFlagBits::eVertexBuffer: + result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead, + vk::PipelineStageFlagBits::eVertexInput); + break; + case vk::BufferUsageFlagBits::eIndexBuffer: + result = std::make_pair(vk::AccessFlagBits::eIndexRead, + vk::PipelineStageFlagBits::eVertexInput); + case vk::BufferUsageFlagBits::eUniformBuffer: + result = std::make_pair(vk::AccessFlagBits::eUniformRead, + vk::PipelineStageFlagBits::eVertexShader | + vk::PipelineStageFlagBits::eGeometryShader | + vk::PipelineStageFlagBits::eFragmentShader); + case vk::BufferUsageFlagBits::eUniformTexelBuffer: + result = std::make_pair(vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eFragmentShader); + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage); + } + + return result; +} + +StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage) + : instance{instance} { + const vk::BufferCreateInfo buffer_info = { + .size = size, + .usage = usage + }; + + const VmaAllocationCreateInfo alloc_create_info = { + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST + }; + + VkBuffer unsafe_buffer = VK_NULL_HANDLE; + VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); + VmaAllocationInfo alloc_info; + VmaAllocator allocator = instance.GetAllocator(); + + vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, + &unsafe_buffer, &allocation, &alloc_info); + + buffer = vk::Buffer{unsafe_buffer}; + mapped = std::span{reinterpret_cast(alloc_info.pMappedData), size}; +} + +StagingBuffer::~StagingBuffer() { + vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); +} + +StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info) + : instance{instance}, scheduler{scheduler}, info{info}, + staging{instance, info.size, vk::BufferUsageFlagBits::eTransferSrc} { + + const vk::BufferCreateInfo buffer_info = { + .size = info.size, + .usage = info.usage | vk::BufferUsageFlagBits::eTransferDst + }; + + const VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE + }; + + VkBuffer unsafe_buffer = VK_NULL_HANDLE; + VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); + VmaAllocationInfo alloc_info; + VmaAllocator allocator = instance.GetAllocator(); + + vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, + &unsafe_buffer, &allocation, &alloc_info); + + buffer = vk::Buffer{unsafe_buffer}; + + vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < info.views.size(); i++) { + if (info.views[i] == vk::Format::eUndefined) { + view_count = i; + break; + } + + const vk::BufferViewCreateInfo view_info = { + .buffer = buffer, + .format = info.views[i], + .range = info.size + }; + + views[i] = device.createBufferView(view_info); + } + + available_size = info.size; +} + +StreamBuffer::~StreamBuffer() { + if (buffer) { + vk::Device device = instance.GetDevice(); + vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); + for (u32 i = 0; i < view_count; i++) { + device.destroyBufferView(views[i]); + } + } +} + +std::tuple StreamBuffer::Map(u32 size, u32 alignment) { + ASSERT(size <= info.size && alignment <= info.size); + + if (alignment > 0) { + buffer_offset = Common::AlignUp(buffer_offset, alignment); + } + + // Have we run out of available space? + bool invalidate = false; + if (available_size < size) { + // Flush any pending writes before continuing + Flush(); + + // If we are at the end of the buffer, start over + if (buffer_offset + size > info.size) { + Invalidate(); + invalidate = true; + } + + // Try to garbage collect old regions + if (!UnlockFreeRegions(size)) { + // Nuclear option: stall the GPU to remove all the locks + LOG_WARNING(Render_Vulkan, "Buffer GPU stall"); + Invalidate(); + regions.clear(); + available_size = info.size; + } + } + + u8* mapped = reinterpret_cast(staging.mapped.data() + buffer_offset); + return std::make_tuple(mapped, buffer_offset, invalidate); +} + +void StreamBuffer::Commit(u32 size) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + + auto [access_mask, stage_mask] = ToVkAccessStageFlags(info.usage); + const vk::BufferMemoryBarrier buffer_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = access_mask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buffer, + .offset = buffer_offset, + .size = size + }; + + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, + vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {}); + + + buffer_offset += size; + available_size -= size; +} + +void StreamBuffer::Flush() { + const u32 flush_size = buffer_offset - flush_start; + if (flush_size > 0) { + vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer(); + VmaAllocator allocator = instance.GetAllocator(); + + const u32 flush_size = buffer_offset - flush_start; + const vk::BufferCopy copy_region = { + .srcOffset = flush_start, + .dstOffset = flush_start, + .size = flush_size + }; + + vmaFlushAllocation(allocator, allocation, flush_start, flush_size); + command_buffer.copyBuffer(staging.buffer, buffer, copy_region); + + // Lock the region + const LockedRegion region = { + .size = flush_size, + .fence_counter = scheduler.GetFenceCounter() + }; + + regions.emplace(flush_start, region); + flush_start = buffer_offset; + } +} + +void StreamBuffer::Invalidate() { + buffer_offset = 0; + flush_start = 0; +} + +bool StreamBuffer::UnlockFreeRegions(u32 target_size) { + available_size = 0; + + // Free regions that don't need waiting + auto it = regions.lower_bound(buffer_offset); + while (it != regions.end()) { + const auto& [offset, region] = *it; + if (region.fence_counter <= scheduler.GetFenceCounter()) { + available_size += region.size; + it = regions.erase(it); + } + else { + break; + } + } + + // If that wasn't enough, try waiting for some fences + while (available_size < target_size) { + const auto& [offset, region] = *it; + + if (region.fence_counter > scheduler.GetFenceCounter()) { + scheduler.WaitFence(region.fence_counter); + } + + available_size += region.size; + it = regions.erase(it); + } + + return available_size >= target_size; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..bfc4d5556 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,87 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class TaskScheduler; + +constexpr u32 MAX_BUFFER_VIEWS = 3; + +struct BufferInfo { + u32 size = 0; + vk::BufferUsageFlagBits usage{}; + std::array views{}; +}; + +struct LockedRegion { + u32 size = 0; + u64 fence_counter = 0; +}; + +struct StagingBuffer { + StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage); + ~StagingBuffer(); + + const Instance& instance; + vk::Buffer buffer{}; + VmaAllocation allocation{}; + std::span mapped{}; +}; + +class StreamBuffer { +public: + StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info); + ~StreamBuffer(); + + std::tuple Map(u32 size, u32 alignment = 0); + + /// Commits size bytes from the currently mapped staging memory + void Commit(u32 size = 0); + + /// Flushes staging memory to the GPU buffer + void Flush(); + + /// Returns the Vulkan buffer handle + vk::Buffer GetHandle() const { + return buffer; + } + + /// Returns an immutable reference to the requested buffer view + const vk::BufferView& GetView(u32 index = 0) const { + ASSERT(index < view_count); + return views[index]; + } + +private: + /// Invalidates the buffer offsets + void Invalidate(); + + /// Removes the lock on regions whose fence counter has been reached by the GPU + bool UnlockFreeRegions(u32 target_size); + +private: + const Instance& instance; + TaskScheduler& scheduler; + BufferInfo info{}; + StagingBuffer staging; + + vk::Buffer buffer{}; + VmaAllocation allocation{}; + std::array views{}; + u32 view_count = 0; + + u32 buffer_offset = 0; + u32 flush_start = 0; + s32 available_size = 0; + std::map regions; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp new file mode 100644 index 000000000..b872293ef --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -0,0 +1,237 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" + +namespace Vulkan { + +Swapchain::Swapchain(const Instance& instance, CommandScheduler& scheduler, + RenderpassCache& renderpass_cache, vk::SurfaceKHR surface) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, surface{surface} { + + // Set the surface format early for RenderpassCache to create the present renderpass + Configure(0, 0); + renderpass_cache.CreatePresentRenderpass(surface_format.format); +} + +Swapchain::~Swapchain() { + vk::Device device = instance.GetDevice(); + device.destroySemaphore(render_finished); + device.destroySemaphore(image_available); + device.destroySwapchainKHR(swapchain); +} + +void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) { + is_outdated = false; + is_suboptimal = false; + + // Fetch information about the provided surface + Configure(width, height); + + const std::array queue_family_indices = { + instance.GetGraphicsQueueFamilyIndex(), + instance.GetPresentQueueFamilyIndex(), + }; + + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; + const u32 queue_family_indices_count = exclusive ? 1u : 2u; + const vk::SharingMode sharing_mode = + exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent; + const vk::SwapchainCreateInfoKHR swapchain_info = { + .surface = surface, + .minImageCount = image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = extent, + .imageArrayLayers = 1, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment, + .imageSharingMode = sharing_mode, + .queueFamilyIndexCount = queue_family_indices_count, + .pQueueFamilyIndices = queue_family_indices.data(), + .preTransform = transform, + .presentMode = present_mode, + .clipped = true, + .oldSwapchain = swapchain + }; + + vk::Device device = instance.GetDevice(); + vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info); + + // If an old swapchain exists, destroy it and move the new one to its place. + if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) { + device.destroySwapchainKHR(old_swapchain); + } + + // Create sync objects if not already created + if (!image_available) { + image_available = device.createSemaphore({}); + } + + if (!render_finished) { + render_finished = device.createSemaphore({}); + } + + vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass(); + auto images = device.getSwapchainImagesKHR(swapchain); + + // Destroy the previous images + for (auto& image : swapchain_images) { + device.destroyImageView(image.image_view); + device.destroyFramebuffer(image.framebuffer); + } + swapchain_images.clear(); + + std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image { + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = vk::ImageViewType::e2D, + .format = surface_format.format, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + } + }; + + vk::ImageView image_view = device.createImageView(view_info); + const std::array attachments{image_view}; + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = present_renderpass, + .attachmentCount = 1, + .pAttachments = attachments.data(), + .width = extent.width, + .height = extent.height, + .layers = 1 + }; + + vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info); + + return Image{ + .image = image, + .image_view = image_view, + .framebuffer = framebuffer + }; + }); +} + +// Wait for maximum of 1 second +constexpr u64 ACQUIRE_TIMEOUT = 1000000000; + +void Swapchain::AcquireNextImage() { + vk::Device device = instance.GetDevice(); + vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE, + ¤t_image); + switch (result) { + case vk::Result::eSuccess: + break; + case vk::Result::eSuboptimalKHR: + is_suboptimal = true; + break; + case vk::Result::eErrorOutOfDateKHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result"); + break; + } +} + +void Swapchain::Present() { + const vk::PresentInfoKHR present_info = { + .waitSemaphoreCount = 1, + .pWaitSemaphores = &render_finished, + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = ¤t_image + }; + + vk::Queue present_queue = instance.GetPresentQueue(); + vk::Result result = present_queue.presentKHR(present_info); + + switch (result) { + case vk::Result::eSuccess: + break; + case vk::Result::eSuboptimalKHR: + LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); + break; + case vk::Result::eErrorOutOfDateKHR: + is_outdated = true; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed"); + break; + } + + current_frame = (current_frame + 1) % swapchain_images.size(); +} + +void Swapchain::Configure(u32 width, u32 height) { + vk::PhysicalDevice physical = instance.GetPhysicalDevice(); + + // Choose surface format + auto formats = physical.getSurfaceFormatsKHR(surface); + surface_format = formats[0]; + + if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { + surface_format.format = vk::Format::eB8G8R8A8Unorm; + } else { + auto it = std::ranges::find_if(formats, [](vk::SurfaceFormatKHR format) -> bool { + return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear && + format.format == vk::Format::eB8G8R8A8Unorm; + }); + + if (it == formats.end()) { + LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!"); + } else { + surface_format = *it; + } + } + + // Checks if a particular mode is supported, if it is, returns that mode. + auto modes = physical.getSurfacePresentModesKHR(surface); + + // FIFO is guaranteed by the Vulkan standard to be available + present_mode = vk::PresentModeKHR::eFifo; + auto iter = std::ranges::find_if(modes, [](vk::PresentModeKHR mode) { + return vk::PresentModeKHR::eMailbox == mode; + }); + + // Prefer Mailbox if present for lowest latency + if (iter != modes.end()) { + present_mode = vk::PresentModeKHR::eMailbox; + } + + // Query surface extent + auto capabilities = physical.getSurfaceCapabilitiesKHR(surface); + extent = capabilities.currentExtent; + + if (capabilities.currentExtent.width == std::numeric_limits::max()) { + extent.width = std::clamp(width, capabilities.minImageExtent.width, + capabilities.maxImageExtent.width); + extent.height = std::clamp(height, capabilities.minImageExtent.height, + capabilities.maxImageExtent.height); + } + + // Select number of images in swap chain, we prefer one buffer in the background to work on + image_count = capabilities.minImageCount + 1; + if (capabilities.maxImageCount > 0) { + image_count = std::min(image_count, capabilities.maxImageCount); + } + + // Prefer identity transform if possible + transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; + if (!(capabilities.supportedTransforms & transform)) { + transform = capabilities.currentTransform; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h new file mode 100644 index 000000000..7cd331639 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -0,0 +1,101 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class CommandScheduler; +class RenderpassCache; + +class Swapchain { +public: + Swapchain(const Instance& instance, CommandScheduler& scheduler, + RenderpassCache& renderpass_cache,vk::SurfaceKHR surface); + ~Swapchain(); + + /// Creates (or recreates) the swapchain with a given size. + void Create(u32 width, u32 height, bool vsync_enabled); + + /// Acquires the next image in the swapchain. + void AcquireNextImage(); + + /// Presents the current image and move to the next one + void Present(); + + /// Returns current swapchain state + vk::Extent2D GetExtent() const { + return extent; + } + + /// Returns the swapchain surface + vk::SurfaceKHR GetSurface() const { + return surface; + } + + /// Returns the swapchain format + vk::SurfaceFormatKHR GetSurfaceFormat() const { + return surface_format; + } + + /// Returns the Vulkan swapchain handle + vk::SwapchainKHR GetHandle() const { + return swapchain; + } + + /// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes + vk::Semaphore GetAvailableSemaphore() const { + return image_available; + } + + /// Returns the semaphore that will signal when the current image will be presented + vk::Semaphore GetPresentSemaphore() const { + return render_finished; + } + + /// Returns true when the swapchain should be recreated + bool NeedsRecreation() const { + return is_suboptimal || is_outdated; + } + +private: + void Configure(u32 width, u32 height); + +private: + const Instance& instance; + CommandScheduler& scheduler; + RenderpassCache& renderpass_cache; + vk::SwapchainKHR swapchain{}; + vk::SurfaceKHR surface{}; + + // Swapchain properties + vk::SurfaceFormatKHR surface_format; + vk::PresentModeKHR present_mode; + vk::Extent2D extent; + vk::SurfaceTransformFlagBitsKHR transform; + u32 image_count; + + struct Image { + vk::Image image; + vk::ImageView image_view; + vk::Framebuffer framebuffer; + }; + + // Swapchain state + std::vector swapchain_images; + vk::Semaphore image_available{}; + vk::Semaphore render_finished{}; + u32 current_image = 0; + u32 current_frame = 0; + bool vsync_enabled = false; + bool is_outdated = true; + bool is_suboptimal = true; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp new file mode 100644 index 000000000..d7c039189 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -0,0 +1,178 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} { + + vk::Device device = instance.GetDevice(); + const vk::CommandPoolCreateInfo command_pool_info = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex() + }; + + command_pool = device.createCommandPool(command_pool_info); + + constexpr std::array pool_sizes = { + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024}, + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024}, + vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024} + }; + + const vk::DescriptorPoolCreateInfo descriptor_pool_info = { + .maxSets = 2048, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data() + }; + + const vk::CommandBufferAllocateInfo buffer_info = { + .commandPool = command_pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT + }; + + const auto command_buffers = device.allocateCommandBuffers(buffer_info); + for (std::size_t i = 0; i < commands.size(); i++) { + commands[i] = ExecutionSlot{ + .fence = device.createFence({}), + .descriptor_pool = device.createDescriptorPool(descriptor_pool_info), + .render_command_buffer = command_buffers[2 * i], + .upload_command_buffer = command_buffers[2 * i + 1], + }; + } + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + // Begin first command + auto& command = commands[current_command]; + command.render_command_buffer.begin(begin_info); + command.fence_counter = next_fence_counter++; +} + +TaskScheduler::~TaskScheduler() { + // Submit any remaining work + Submit(true, false); + + vk::Device device = instance.GetDevice(); + for (const auto& command : commands) { + device.destroyFence(command.fence); + device.destroyDescriptorPool(command.descriptor_pool); + } + + device.destroyCommandPool(command_pool); +} + +void TaskScheduler::Synchronize(u32 slot) { + const auto& command = commands[slot]; + vk::Device device = instance.GetDevice(); + + if (command.fence_counter > completed_fence_counter) { + if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Waiting for fences failed!"); + } + + completed_fence_counter = command.fence_counter; + } + + device.resetFences(command.fence); + device.resetDescriptorPool(command.descriptor_pool); +} + +void TaskScheduler::WaitFence(u32 counter) { + for (u32 i = 0; i < SCHEDULER_COMMAND_COUNT; i++) { + if (commands[i].fence_counter == counter) { + return Synchronize(i); + } + } + + UNREACHABLE_MSG("Invalid fence counter!"); +} + +void TaskScheduler::Submit(bool wait_completion, bool begin_next, + vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) { + const auto& command = commands[current_command]; + command.render_command_buffer.end(); + if (command.use_upload_buffer) { + command.upload_command_buffer.end(); + } + + u32 command_buffer_count = 0; + std::array command_buffers; + + if (command.use_upload_buffer) { + command_buffers[command_buffer_count++] = command.upload_command_buffer; + } + + command_buffers[command_buffer_count++] = command.render_command_buffer; + + const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u; + const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u; + const vk::PipelineStageFlags wait_stage_masks = + vk::PipelineStageFlagBits::eColorAttachmentOutput; + const vk::SubmitInfo submit_info = { + .waitSemaphoreCount = wait_semaphore_count, + .pWaitSemaphores = &wait_semaphore, + .pWaitDstStageMask = &wait_stage_masks, + .commandBufferCount = command_buffer_count, + .pCommandBuffers = command_buffers.data(), + .signalSemaphoreCount = signal_semaphore_count, + .pSignalSemaphores = &signal_semaphore, + }; + + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info, command.fence); + + // Block host until the GPU catches up + if (wait_completion) { + Synchronize(current_command); + } + + // Switch to next cmdbuffer. + if (begin_next) { + SwitchSlot(); + } +} + +vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() { + auto& command = commands[current_command]; + if (!command.use_upload_buffer) { + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + command.upload_command_buffer.begin(begin_info); + command.use_upload_buffer = true; + } + + return command.upload_command_buffer; +} + +void TaskScheduler::SwitchSlot() { + current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT; + auto& command = commands[current_command]; + + // Wait for the GPU to finish with all resources for this command. + Synchronize(current_command); + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + // Begin the next command buffer. + command.render_command_buffer.begin(begin_info); + command.fence_counter = next_fence_counter++; + command.use_upload_buffer = false; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h new file mode 100644 index 000000000..40310399a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -0,0 +1,82 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +constexpr u32 SCHEDULER_COMMAND_COUNT = 4; + +class Buffer; +class Instance; + +class TaskScheduler { +public: + TaskScheduler(const Instance& instance); + ~TaskScheduler(); + + /// Blocks the host until the current command completes execution + void Synchronize(u32 slot); + + /// Waits for the fence counter to be reached by the GPU + void WaitFence(u32 counter); + + /// Submits the current command to the graphics queue + void Submit(bool wait_completion = false, bool begin_next = true, + vk::Semaphore wait = VK_NULL_HANDLE, + vk::Semaphore signal = VK_NULL_HANDLE); + + /// Returns the command buffer used for early upload operations. + vk::CommandBuffer GetUploadCommandBuffer(); + + /// Returns the command buffer used for rendering + vk::CommandBuffer GetRenderCommandBuffer() const { + return commands[current_command].render_command_buffer; + } + + /// Returns the current descriptor pool + vk::DescriptorPool GetDescriptorPool() const { + return commands[current_command].descriptor_pool; + } + + /// Returns the index of the current command slot + u32 GetCurrentSlotIndex() const { + return current_command; + } + + /// Returns the last completed fence counter + u64 GetFenceCounter() const { + return completed_fence_counter; + } + +private: + /// Activates the next command slot and optionally waits for its completion + void SwitchSlot(); + +private: + const Instance& instance; + u64 next_fence_counter = 1; + u64 completed_fence_counter = 0; + + struct ExecutionSlot { + bool use_upload_buffer = false; + u64 fence_counter = 0; + vk::Fence fence{}; + vk::DescriptorPool descriptor_pool; + vk::CommandBuffer render_command_buffer{}; + vk::CommandBuffer upload_command_buffer{}; + }; + + vk::CommandPool command_pool{}; + std::array commands; + u32 current_command = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp new file mode 100644 index 000000000..d2204233a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -0,0 +1,562 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Vulkan { + +vk::Format ToVkFormat(VideoCore::PixelFormat format) { + switch (format) { + case VideoCore::PixelFormat::RGBA8: + return vk::Format::eR8G8B8A8Unorm; + case VideoCore::PixelFormat::RGB8: + return vk::Format::eR8G8B8Unorm; + case VideoCore::PixelFormat::RGB5A1: + return vk::Format::eR5G5B5A1UnormPack16; + case VideoCore::PixelFormat::RGB565: + return vk::Format::eR5G6B5UnormPack16; + case VideoCore::PixelFormat::RGBA4: + return vk::Format::eR4G4B4A4UnormPack16; + case VideoCore::PixelFormat::D16: + return vk::Format::eD16Unorm; + case VideoCore::PixelFormat::D24: + return vk::Format::eX8D24UnormPack32; + case VideoCore::PixelFormat::D24S8: + return vk::Format::eD24UnormS8Uint; + case VideoCore::PixelFormat::Invalid: + LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format); + return vk::Format::eUndefined; + default: + // Use default case for the texture formats + return vk::Format::eR8G8B8A8Unorm; + } +} + +vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return vk::ImageAspectFlagBits::eColor; + case VideoCore::SurfaceType::Depth: + return vk::ImageAspectFlagBits::eDepth; + case VideoCore::SurfaceType::DepthStencil: + return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return vk::ImageAspectFlagBits::eColor; +} + +constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024; + +TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler) + : instance{instance}, scheduler{scheduler} { + + for (auto& buffer : staging_buffers) { + buffer = std::make_unique(instance, STAGING_BUFFER_SIZE, + vk::BufferUsageFlagBits::eTransferSrc | + vk::BufferUsageFlagBits::eTransferDst); + } +} + +StagingData TextureRuntime::FindStaging(u32 size, bool upload) { + const u32 current_slot = scheduler.GetCurrentSlotIndex(); + const u32 offset = staging_offsets[current_slot]; + if (offset + size > STAGING_BUFFER_SIZE) { + LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!"); + UNREACHABLE(); + } + + const auto& buffer = staging_buffers[current_slot]; + return StagingData{ + .buffer = buffer->buffer, + .mapped = buffer->mapped.subspan(offset, size), + .buffer_offset = offset + }; +} + +void TextureRuntime::OnSlotSwitch(u32 new_slot) { + staging_offsets[new_slot] = 0; +} + +ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, + VideoCore::TextureType type) { + + const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; + const VideoCore::HostTextureTag key = { + .format = format, + .width = width, + .height = height, + .layers = layers + }; + + // Attempt to recycle an unused allocation + if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { + ImageAlloc alloc = std::move(it->second); + texture_recycler.erase(it); + return alloc; + } + + // Create a new allocation + vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format)); + vk::ImageAspectFlags aspect = GetImageAspect(vk_format); + + const vk::ImageCreateInfo image_info = { + .flags = type == VideoCore::TextureType::CubeMap ? + vk::ImageCreateFlagBits::eCubeCompatible : + vk::ImageCreateFlags{}, + .imageType = vk::ImageType::e2D, + .format = vk_format, + .extent = {width, height, 1}, + .mipLevels = std::bit_width(std::max(width, height)), + .arrayLayers = layers, + .samples = vk::SampleCountFlagBits::e1, + .usage = GetImageUsage(aspect), + }; + + const VmaAllocationCreateInfo alloc_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + VmaAllocation allocation; + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &allocation, nullptr); + if (result != VK_SUCCESS) { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + + vk::Image image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = type == VideoCore::TextureType::CubeMap ? + vk::ImageViewType::eCube : + vk::ImageViewType::e2D, + .format = vk_format, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + } + }; + + vk::Device device = instance.GetDevice(); + vk::ImageView image_view = device.createImageView(view_info); + + return ImageAlloc{ + .image = image, + .image_view = image_view, + .allocation = allocation, + }; +} + +bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1); + + // For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage + if (clear.texture_rect == surface.GetScaledRect()) { + vk::ImageAspectFlags aspect = ToVkAspect(surface.type); + if (aspect & vk::ImageAspectFlagBits::eColor) { + const vk::ClearColorValue clear_color = { + .float32 = std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]}) + }; + + const vk::ImageSubresourceRange range = { + .aspectMask = aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }; + + command_buffer.clearColorImage(surface.image, vk::ImageLayout::eTransferDstOptimal, + clear_color, range); + } else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) { + const vk::ClearDepthStencilValue clear_depth = { + .depth = value.depth, + .stencil = value.stencil + }; + + const vk::ImageSubresourceRange range = { + .aspectMask = aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }; + + command_buffer.clearDepthStencilImage(surface.image, vk::ImageLayout::eTransferDstOptimal, + clear_depth, range); + } + } else { + LOG_WARNING(Render_Vulkan, "Partial clears are unimplemented!"); + } + + return true; +} + +bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + const vk::ImageCopy image_copy = { + .srcSubresource = { + .aspectMask = ToVkAspect(source.type), + .mipLevel = copy.src_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), 0}, + .dstSubresource = { + .aspectMask = ToVkAspect(dest.type), + .mipLevel = copy.dst_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), 0}, + .extent = {copy.extent.width, copy.extent.height, 1} + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); + dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); + + command_buffer.copyImage(source.image, vk::ImageLayout::eTransferSrcOptimal, + dest.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + + return true; +} + +bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); + dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); + + const std::array source_offsets = { + vk::Offset3D{static_cast(blit.src_rect.left), static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), static_cast(blit.src_rect.top), 1} + }; + + const std::array dest_offsets = { + vk::Offset3D{static_cast(blit.dst_rect.left), static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), 1} + }; + + const vk::ImageBlit blit_area = { + .srcSubresource = { + .aspectMask = ToVkAspect(source.type), + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1 + }, + .srcOffsets = source_offsets, + .dstSubresource = { + .aspectMask = ToVkAspect(dest.type), + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1 + }, + .dstOffsets = dest_offsets + }; + + command_buffer.blitImage(source.image, vk::ImageLayout::eTransferSrcOptimal, + dest.image, vk::ImageLayout::eTransferDstOptimal, + blit_area, vk::Filter::eLinear); + + return true; +} + +void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { + // TODO: Investigate AMD single pass downsampler + s32 current_width = surface.GetScaledWidth(); + s32 current_height = surface.GetScaledHeight(); + + const u32 levels = std::bit_width(std::max(surface.width, surface.height)); + vk::ImageAspectFlags aspect = ToVkAspect(surface.type); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + for (u32 i = 1; i < levels; i++) { + surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); + surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, i, 1); + + const std::array source_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{current_width, current_height, 1} + }; + + const std::array dest_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{current_width > 1 ? current_width / 2 : 1, + current_height > 1 ? current_height / 2 : 1, 1} + }; + + const vk::ImageBlit blit_area = { + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = i - 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .srcOffsets = source_offsets, + .dstSubresource = { + .aspectMask = aspect, + .mipLevel = i, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .dstOffsets = dest_offsets + }; + + command_buffer.blitImage(surface.image, vk::ImageLayout::eTransferSrcOptimal, + surface.image, vk::ImageLayout::eTransferDstOptimal, + blit_area, vk::Filter::eLinear); + } +} + +Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) + : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, + scheduler{runtime.GetScheduler()} { + const ImageAlloc alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), + params.pixel_format, texture_type); + + allocation = alloc.allocation; + image_view = alloc.image_view; + image = alloc.image; +} + +MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); +void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { + MICROPROFILE_SCOPE(Vulkan_Upload); + + const bool is_scaled = res_scale != 1; + if (is_scaled) { + ScaledUpload(upload); + } else { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const VideoCore::Rect2D rect = upload.texture_rect; + const vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = { + .aspectMask = aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} + }; + + TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); + command_buffer.copyBufferToImage(staging.buffer, image, + vk::ImageLayout::eTransferDstOptimal, + copy_region); + } + + InvalidateAllWatcher(); +} + +MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64)); +void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) { + MICROPROFILE_SCOPE(Vulkan_Download); + + const bool is_scaled = res_scale != 1; + if (is_scaled) { + ScaledDownload(download); + } else { + u32 region_count = 0; + std::array copy_regions; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const VideoCore::Rect2D rect = download.texture_rect; + vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = { + .aspectMask = aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} + }; + + if (aspect & vk::ImageAspectFlagBits::eColor) { + copy_regions[region_count++] = copy_region; + } else if (aspect & vk::ImageAspectFlagBits::eDepth) { + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + copy_regions[region_count++] = copy_region; + + if (aspect & vk::ImageAspectFlagBits::eStencil) { + copy_region.bufferOffset += staging.mapped.size(); + copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil; + copy_regions[region_count++] = copy_region; + } + } + + TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); + + // Copy pixel data to the staging buffer + command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, + staging.buffer, region_count, copy_regions.data()); + + scheduler.Submit(true); + } +} + +void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { + /*const u32 rect_width = download.texture_rect.GetWidth(); + const u32 rect_height = download.texture_rect.GetHeight(); + + // Allocate an unscaled texture that fits the download rectangle to use as a blit destination + const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, + VideoCore::TextureType::Texture2D); + runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); + runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture); + + // Blit the scaled rectangle to the unscaled texture + const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; + glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top, + 0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); + + const auto& tuple = runtime.GetFormatTuple(pixel_format); + if (driver.IsOpenGLES()) { + const auto& downloader_es = runtime.GetDownloaderES(); + downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect_height, rect_width, + reinterpret_cast(download.buffer_offset)); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + reinterpret_cast(download.buffer_offset)); + }*/ +} + +void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { + /*const u32 rect_width = upload.texture_rect.GetWidth(); + const u32 rect_height = upload.texture_rect.GetHeight(); + + OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, + VideoCore::TextureType::Texture2D); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); + + glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, + tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); + + const auto scaled_rect = upload.texture_rect * res_scale; + const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; + const auto& filterer = runtime.GetFilterer(); + if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) { + runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); + runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture); + + // If filtering fails, resort to normal blitting + glBlitFramebuffer(0, 0, rect_width, rect_height, + upload.texture_rect.left, upload.texture_rect.bottom, + upload.texture_rect.right, upload.texture_rect.top, + MakeBufferMask(type), GL_LINEAR); + }*/ +} + +void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, + u32 level, u32 level_count) { + if (new_layout == layout) { + return; + } + + struct LayoutInfo { + vk::AccessFlags access; + vk::PipelineStageFlags stage; + }; + + // Get optimal transition settings for every image layout. Settings taken from Dolphin + auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo { + LayoutInfo info; + switch (layout) { + case vk::ImageLayout::eUndefined: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + info.access = vk::AccessFlagBits::eNone; + info.stage = vk::PipelineStageFlagBits::eTopOfPipe; + break; + case vk::ImageLayout::ePreinitialized: + // Image has been pre-initialized by the host, so ensure all writes have completed. + info.access = vk::AccessFlagBits::eHostWrite; + info.stage = vk::PipelineStageFlagBits::eHost; + break; + case vk::ImageLayout::eColorAttachmentOptimal: + // Image was being used as a color attachment, so ensure all writes have completed. + info.access = vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite; + info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput; + break; + case vk::ImageLayout::eDepthStencilAttachmentOptimal: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; + break; + case vk::ImageLayout::ePresentSrcKHR: + info.access = vk::AccessFlagBits::eNone; + info.stage = vk::PipelineStageFlagBits::eBottomOfPipe; + break; + case vk::ImageLayout::eShaderReadOnlyOptimal: + // Image was being used as a shader resource, make sure all reads have finished. + info.access = vk::AccessFlagBits::eShaderRead; + info.stage = vk::PipelineStageFlagBits::eFragmentShader; + break; + case vk::ImageLayout::eTransferSrcOptimal: + // Image was being used as a copy source, ensure all reads have finished. + info.access = vk::AccessFlagBits::eTransferRead; + info.stage = vk::PipelineStageFlagBits::eTransfer; + break; + case vk::ImageLayout::eTransferDstOptimal: + // Image was being used as a copy destination, ensure all writes have finished. + info.access = vk::AccessFlagBits::eTransferWrite; + info.stage = vk::PipelineStageFlagBits::eTransfer; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); + UNREACHABLE(); + } + + return info; + }; + + LayoutInfo source = GetLayoutInfo(layout); + LayoutInfo dest = GetLayoutInfo(new_layout); + + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = source.access, + .dstAccessMask = dest.access, + .oldLayout = layout, + .newLayout = new_layout, + .image = image, + .subresourceRange = {aspect, level, level_count, 0, 1} + }; + + command_buffer.pipelineBarrier(source.stage, dest.stage, + vk::DependencyFlagBits::eByRegion, + {}, {}, barrier); + + layout = new_layout; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h new file mode 100644 index 000000000..79b62949b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -0,0 +1,129 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/rasterizer_cache/types.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" + +namespace Vulkan { + +struct StagingData { + vk::Buffer buffer; + std::span mapped{}; + u32 buffer_offset = 0; +}; + +struct ImageAlloc { + vk::Image image; + vk::ImageView image_view; + VmaAllocation allocation; +}; + +class Instance; +class Surface; + +/** + * Provides texture manipulation functions to the rasterizer cache + * Separating this into a class makes it easier to abstract graphics API code + */ +class TextureRuntime { + friend class Surface; +public: + TextureRuntime(const Instance& instance, TaskScheduler& scheduler); + ~TextureRuntime() = default; + + /// Maps an internal staging buffer of the provided size of pixel uploads/downloads + StagingData FindStaging(u32 size, bool upload); + + /// Performs operations that need to be done on every scheduler slot switch + void OnSlotSwitch(u32 new_slot); + + /// Fills the rectangle of the texture with the clear value provided + bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value); + + /// Copies a rectangle of src_tex to another rectange of dst_rect + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + + /// Blits a rectangle of src_tex to another rectange of dst_rect + bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); + + /// Generates mipmaps for all the available levels of the texture + void GenerateMipmaps(Surface& surface, u32 max_level); + +private: + /// Allocates a vulkan image possibly resusing an existing one + ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format, + VideoCore::TextureType type); + + /// Returns the current Vulkan instance + const Instance& GetInstance() const { + return instance; + } + + /// Returns the current Vulkan scheduler + TaskScheduler& GetScheduler() const { + return scheduler; + } + +private: + const Instance& instance; + TaskScheduler& scheduler; + std::array, SCHEDULER_COMMAND_COUNT> staging_buffers; + std::array staging_offsets{}; + std::unordered_map texture_recycler; +}; + +class Surface : public VideoCore::SurfaceBase { + friend class TextureRuntime; +public: + Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); + ~Surface() override = default; + + /// Uploads pixel data in staging to a rectangle region of the surface texture + void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); + + /// Downloads pixel data to staging from a rectangle region of the surface texture + void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging); + +private: + /// Downloads scaled image by downscaling the requested rectangle + void ScaledDownload(const VideoCore::BufferTextureCopy& download); + + /// Uploads pixel data to scaled texture + void ScaledUpload(const VideoCore::BufferTextureCopy& upload); + + /// Overrides the image layout of the mip level range + void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); + + /// Transitions the mip level range of the surface to new_layout + void TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, + u32 level, u32 level_count); + +private: + TextureRuntime& runtime; + const Instance& instance; + TaskScheduler& scheduler; + + vk::Image image{}; + vk::ImageView image_view{}; + VmaAllocation allocation = nullptr; + vk::Format internal_format = vk::Format::eUndefined; + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; +}; + +struct Traits { + using Runtime = TextureRuntime; + using Surface = Surface; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + +} // namespace Vulkan diff --git a/src/video_core/shader/shader_cache.h b/src/video_core/shader/shader_cache.h new file mode 100644 index 000000000..ba3f703f8 --- /dev/null +++ b/src/video_core/shader/shader_cache.h @@ -0,0 +1,97 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "video_core/shader/shader.h" + +namespace Pica::Shader { + +template +using ShaderCacheResult = std::pair>; + +template +class ShaderCache { +public: + ShaderCache() {} + ~ShaderCache() = default; + + /// Returns a shader handle generated from the provided config + template + auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult { + auto [iter, new_shader] = shaders.emplace(config, ShaderType{}); + auto& shader = iter->second; + + if (new_shader) { + std::string code = CodeGenerator(config); + shader = ModuleCompiler(code, args...); + return std::make_pair(shader, code); + } + + return std::make_pair(shader, std::nullopt); + } + + void Inject(const KeyType& key, ShaderType&& shader) { + shaders.emplace(key, std::move(shader)); + } + +private: + std::unordered_map shaders; +}; + +/** + * This is a cache designed for shaders translated from PICA shaders. The first cache matches the + * config structure like a normal cache does. On cache miss, the second cache matches the generated + * GLSL code. The configuration is like this because there might be leftover code in the PICA shader + * program buffer from the previous shader, which is hashed into the config, resulting several + * different config values from the same shader program. + */ +template (*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyType&)> +class ShaderDoubleCache { +public: + ShaderDoubleCache() = default; + ~ShaderDoubleCache() = default; + + template + auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) -> ShaderCacheResult { + if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) { + auto code = CodeGenerator(setup, key); + if (!code) { + shader_map[key] = nullptr; + return std::make_pair(ShaderType{}, std::nullopt); + } + + std::string& program = code.value(); + auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{}); + auto& shader = iter->second; + + if (new_shader) { + shader = ModuleCompiler(program, args...); + } + + shader_map[key] = &shader; + return std::make_pair(shader, std::move(program)); + } else { + return std::make_pair(*map_iter->second, std::nullopt); + } + } + + void Inject(const KeyType& key, std::string decomp, ShaderType&& program) { + const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first; + + auto& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + +private: + std::unordered_map shader_map; + std::unordered_map shader_cache; +}; + +} // namespace Pica::Shader