renderer_vulkan: Add experimental Vulkan renderer
This commit is contained in:
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -61,3 +61,9 @@
|
||||
[submodule "vulkan-headers"]
|
||||
path = externals/vulkan-headers
|
||||
url = https://github.com/KhronosGroup/Vulkan-Headers
|
||||
[submodule "glslang"]
|
||||
path = externals/glslang
|
||||
url = https://github.com/KhronosGroup/glslang
|
||||
[submodule "glm"]
|
||||
path = externals/glm
|
||||
url = https://github.com/g-truc/glm
|
||||
|
@ -9,6 +9,7 @@ cmake_policy(SET CMP0069 NEW)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||
include(DownloadExternals)
|
||||
include(GNUInstallDirs)
|
||||
include(CMakeDependentOption)
|
||||
|
||||
project(citra LANGUAGES C CXX ASM)
|
||||
|
10
externals/CMakeLists.txt
vendored
10
externals/CMakeLists.txt
vendored
@ -60,6 +60,16 @@ endif()
|
||||
# Glad
|
||||
add_subdirectory(glad)
|
||||
|
||||
# glslang
|
||||
set(SKIP_GLSLANG_INSTALL ON)
|
||||
set(ENABLE_GLSLANG_BINARIES OFF)
|
||||
set(ENABLE_SPVREMAPPER OFF)
|
||||
set(ENABLE_CTEST OFF)
|
||||
add_subdirectory(glslang)
|
||||
|
||||
# glm
|
||||
add_subdirectory(glm)
|
||||
|
||||
# inih
|
||||
add_subdirectory(inih)
|
||||
|
||||
|
1
externals/glm
vendored
Submodule
1
externals/glm
vendored
Submodule
Submodule externals/glm added at cc98465e35
1
externals/glslang
vendored
Submodule
1
externals/glslang
vendored
Submodule
Submodule externals/glslang added at c0cf8ad876
@ -122,6 +122,7 @@ else()
|
||||
|
||||
if (MINGW)
|
||||
add_definitions(-DMINGW_HAS_SECURE_API)
|
||||
add_compile_options("-Wa,-mbig-obj")
|
||||
if (COMPILE_WITH_DWARF)
|
||||
add_compile_options("-gdwarf")
|
||||
endif()
|
||||
|
@ -269,6 +269,10 @@ target_link_libraries(citra-qt PRIVATE audio_core common core input_common netwo
|
||||
target_link_libraries(citra-qt PRIVATE Boost::boost glad nihstro-headers Qt5::Widgets Qt5::Multimedia)
|
||||
target_link_libraries(citra-qt PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
|
||||
|
||||
if (NOT WIN32)
|
||||
target_include_directories(citra-qt PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
target_compile_definitions(citra-qt PRIVATE
|
||||
# Use QStringBuilder for string concatenation to reduce
|
||||
# the overall number of temporary strings created.
|
||||
|
@ -25,6 +25,10 @@
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
#if !defined(WIN32)
|
||||
#include <qpa/qplatformnativeinterface.h>
|
||||
#endif
|
||||
|
||||
EmuThread::EmuThread(Frontend::GraphicsContext& core_context) : core_context(core_context) {}
|
||||
|
||||
EmuThread::~EmuThread() = default;
|
||||
@ -53,6 +57,7 @@ void EmuThread::run() {
|
||||
});
|
||||
|
||||
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
|
||||
emit HideLoadingScreen();
|
||||
|
||||
core_context.MakeCurrent();
|
||||
|
||||
@ -303,6 +308,40 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static Frontend::WindowSystemType GetWindowSystemType() {
|
||||
// Determine WSI type based on Qt platform.
|
||||
QString platform_name = QGuiApplication::platformName();
|
||||
if (platform_name == QStringLiteral("windows"))
|
||||
return Frontend::WindowSystemType::Windows;
|
||||
else if (platform_name == QStringLiteral("xcb"))
|
||||
return Frontend::WindowSystemType::X11;
|
||||
else if (platform_name == QStringLiteral("wayland"))
|
||||
return Frontend::WindowSystemType::Wayland;
|
||||
|
||||
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
|
||||
return Frontend::WindowSystemType::Windows;
|
||||
}
|
||||
|
||||
static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
|
||||
Frontend::EmuWindow::WindowSystemInfo wsi;
|
||||
wsi.type = GetWindowSystemType();
|
||||
|
||||
// Our Win32 Qt external doesn't have the private API.
|
||||
#if defined(WIN32) || defined(__APPLE__)
|
||||
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
|
||||
#else
|
||||
QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
|
||||
wsi.display_connection = pni->nativeResourceForWindow("display", window);
|
||||
if (wsi.type == Frontend::WindowSystemType::Wayland)
|
||||
wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
|
||||
else
|
||||
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
|
||||
#endif
|
||||
wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
|
||||
|
||||
return wsi;
|
||||
}
|
||||
|
||||
GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread, bool is_secondary_)
|
||||
: QWidget(parent_), EmuWindow(is_secondary_), emu_thread(emu_thread) {
|
||||
|
||||
@ -539,6 +578,9 @@ bool GRenderWindow::InitRenderTarget() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update the Window System information with the new render target
|
||||
window_info = GetWindowSystemInfo(child_widget->windowHandle());
|
||||
|
||||
child_widget->resize(Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight);
|
||||
|
||||
layout()->addWidget(child_widget);
|
||||
|
@ -76,6 +76,7 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
ui->toggle_accurate_mul->setChecked(Settings::values.shaders_accurate_mul.GetValue());
|
||||
ui->toggle_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
|
||||
ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new.GetValue());
|
||||
ui->graphics_api_combo->setCurrentIndex(static_cast<int>(Settings::values.graphics_api.GetValue()));
|
||||
|
||||
if (Settings::IsConfiguringGlobal()) {
|
||||
ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue());
|
||||
@ -95,6 +96,7 @@ void ConfigureGraphics::ApplyConfiguration() {
|
||||
ui->toggle_disk_shader_cache, use_disk_shader_cache);
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync_new, ui->toggle_vsync_new,
|
||||
use_vsync_new);
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.graphics_api, ui->graphics_api_combo);
|
||||
|
||||
if (Settings::IsConfiguringGlobal()) {
|
||||
Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked();
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <concepts>
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
@ -41,6 +42,13 @@ inline u64 HashCombine(std::size_t& seed, const u64 hash) {
|
||||
return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
||||
}
|
||||
|
||||
template <std::integral T>
|
||||
struct IdentityHash {
|
||||
T operator()(const T& value) const {
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
|
||||
template <typename T>
|
||||
struct HashableStruct {
|
||||
|
@ -236,6 +236,7 @@ void DebuggerBackend::Write(const Entry& entry) {
|
||||
CLS(Render) \
|
||||
SUB(Render, Software) \
|
||||
SUB(Render, OpenGL) \
|
||||
SUB(Render, Vulkan) \
|
||||
CLS(Audio) \
|
||||
SUB(Audio, DSP) \
|
||||
SUB(Audio, Sink) \
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <fmt/format.h>
|
||||
#include <type_traits>
|
||||
|
||||
// adapted from https://github.com/fmtlib/fmt/issues/2704
|
||||
// a generic formatter for enum classes
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <array>
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/formatter.h"
|
||||
|
||||
namespace Log {
|
||||
|
||||
// trims up to and including the last of ../, ..\, src/, src\ in a string
|
||||
@ -103,6 +104,7 @@ enum class Class : ClassType {
|
||||
Render, ///< Emulator video output and hardware acceleration
|
||||
Render_Software, ///< Software renderer backend
|
||||
Render_OpenGL, ///< OpenGL backend
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
Audio, ///< Audio emulation
|
||||
Audio_DSP, ///< The HLE and LLE implementations of the DSP
|
||||
Audio_Sink, ///< Emulator audio output backend
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <compare>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Common {
|
||||
|
@ -446,7 +446,7 @@ struct Values {
|
||||
Setting<bool> allow_plugin_loader{true, "allow_plugin_loader"};
|
||||
|
||||
// Renderer
|
||||
SwitchableSetting<GraphicsAPI> graphics_api{GraphicsAPI::OpenGL, "graphics_api"};
|
||||
SwitchableSetting<GraphicsAPI> graphics_api{GraphicsAPI::Vulkan, "graphics_api"};
|
||||
SwitchableSetting<bool> use_hw_renderer{true, "use_hw_renderer"};
|
||||
SwitchableSetting<bool> use_hw_shader{true, "use_hw_shader"};
|
||||
SwitchableSetting<bool> separable_shader{false, "use_separable_shader"};
|
||||
|
@ -14,6 +14,15 @@
|
||||
|
||||
namespace Frontend {
|
||||
|
||||
/// Information for the Graphics Backends signifying what type of screen pointer is in
|
||||
/// WindowInformation
|
||||
enum class WindowSystemType : u8 {
|
||||
Headless,
|
||||
Windows,
|
||||
X11,
|
||||
Wayland,
|
||||
};
|
||||
|
||||
struct Frame;
|
||||
/**
|
||||
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
|
||||
@ -122,6 +131,23 @@ public:
|
||||
Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight};
|
||||
};
|
||||
|
||||
/// Data describing host window system information
|
||||
struct WindowSystemInfo {
|
||||
// Window system type. Determines which GL context or Vulkan WSI is used.
|
||||
WindowSystemType type = WindowSystemType::Headless;
|
||||
|
||||
// Connection to a display server. This is used on X11 and Wayland platforms.
|
||||
void* display_connection = nullptr;
|
||||
|
||||
// Render surface. This is a pointer to the native window handle, which depends
|
||||
// on the platform. e.g. HWND for Windows, Window for X11. If the surface is
|
||||
// set to nullptr, the video backend will run in headless mode.
|
||||
void* render_surface = nullptr;
|
||||
|
||||
// Scale of the render surface. For hidpi systems, this will be >1.
|
||||
float render_surface_scale = 1.0f;
|
||||
};
|
||||
|
||||
/// Polls window events
|
||||
virtual void PollEvents() = 0;
|
||||
|
||||
@ -185,6 +211,13 @@ public:
|
||||
config = val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns system information about the drawing area.
|
||||
*/
|
||||
const WindowSystemInfo& GetWindowInfo() const {
|
||||
return window_info;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the framebuffer layout (width, height, and screen regions)
|
||||
* @note This method is thread-safe
|
||||
@ -233,6 +266,7 @@ protected:
|
||||
}
|
||||
|
||||
bool is_secondary{};
|
||||
WindowSystemInfo window_info;
|
||||
|
||||
private:
|
||||
/**
|
||||
|
@ -595,33 +595,6 @@ bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const {
|
||||
return GetPhysicalRef(paddr);
|
||||
}
|
||||
|
||||
PAddr MemorySystem::ClampPhysicalAddress(PAddr base, PAddr address) const {
|
||||
struct MemoryArea {
|
||||
PAddr paddr_base;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
constexpr std::array memory_areas = {
|
||||
MemoryArea{VRAM_PADDR, VRAM_SIZE},
|
||||
MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE},
|
||||
MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE},
|
||||
MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE},
|
||||
};
|
||||
|
||||
const auto area =
|
||||
std::ranges::find_if(memory_areas, [&](const MemoryArea& area) {
|
||||
return base >= area.paddr_base && base <= area.paddr_base + area.size;
|
||||
});
|
||||
|
||||
if (area == memory_areas.end()) {
|
||||
LOG_ERROR(HW_Memory, "Unknown base address used for clamping {:#08X} at PC {:#08X}", base,
|
||||
Core::GetRunningCore().GetPC());
|
||||
return address;
|
||||
}
|
||||
|
||||
return std::clamp(address, area->paddr_base, area->paddr_base + area->size);
|
||||
}
|
||||
|
||||
u8* MemorySystem::GetPointer(const VAddr vaddr) {
|
||||
u8* page_pointer = impl->current_page_table->pointers[vaddr >> CITRA_PAGE_BITS];
|
||||
if (page_pointer) {
|
||||
|
@ -587,9 +587,6 @@ public:
|
||||
/// Returns true if the address refers to a valid memory region
|
||||
bool IsValidPhysicalAddress(PAddr paddr) const;
|
||||
|
||||
/// Clamps the address to the boundaries of the memory region pointed by base
|
||||
PAddr ClampPhysicalAddress(PAddr base, PAddr address) const;
|
||||
|
||||
/// Gets offset in FCRAM from a pointer inside FCRAM range
|
||||
u32 GetFCRAMOffset(const u8* pointer) const;
|
||||
|
||||
|
@ -85,11 +85,41 @@ add_library(video_core STATIC
|
||||
#temporary, move these back in alphabetical order before merging
|
||||
renderer_opengl/gl_format_reinterpreter.cpp
|
||||
renderer_opengl/gl_format_reinterpreter.h
|
||||
renderer_vulkan/pica_to_vk.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_instance.cpp
|
||||
renderer_vulkan/vk_instance.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_platform.cpp
|
||||
renderer_vulkan/vk_platform.h
|
||||
renderer_vulkan/vk_renderpass_cache.cpp
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_shader_gen.cpp
|
||||
renderer_vulkan/vk_shader_gen.h
|
||||
renderer_vulkan/vk_shader.cpp
|
||||
renderer_vulkan/vk_shader.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_task_scheduler.cpp
|
||||
renderer_vulkan/vk_task_scheduler.h
|
||||
renderer_vulkan/vk_texture_runtime.cpp
|
||||
renderer_vulkan/vk_texture_runtime.h
|
||||
shader/debug_data.h
|
||||
shader/shader.cpp
|
||||
shader/shader.h
|
||||
shader/shader_cache.h
|
||||
shader/shader_interpreter.cpp
|
||||
shader/shader_interpreter.h
|
||||
shader/shader_uniforms.cpp
|
||||
shader/shader_uniforms.h
|
||||
swrasterizer/clipper.cpp
|
||||
swrasterizer/clipper.h
|
||||
swrasterizer/framebuffer.cpp
|
||||
@ -160,8 +190,11 @@ endif()
|
||||
|
||||
create_target_directory_groups(video_core)
|
||||
|
||||
# Include Vulkan headers
|
||||
target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include)
|
||||
target_include_directories(video_core PRIVATE ../../externals/vma)
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad nihstro-headers Boost::serialization)
|
||||
target_link_libraries(video_core PRIVATE glad glm::glm SPIRV glslang nihstro-headers Boost::serialization)
|
||||
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
|
@ -40,7 +40,7 @@ void Zero(T& o) {
|
||||
State::State() : geometry_pipeline(*this) {
|
||||
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
|
||||
using Pica::Shader::OutputVertex;
|
||||
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
|
||||
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
|
||||
const OutputVertex& v2) {
|
||||
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
||||
};
|
||||
|
@ -136,7 +136,7 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
static void MortonCopy(u32 stride, u32 height, u32 start_offset,
|
||||
static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer,
|
||||
std::span<std::byte> tiled_buffer) {
|
||||
|
||||
@ -148,7 +148,6 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset,
|
||||
// becomes zero for 4-bit textures!
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
|
||||
const u32 end_offset = start_offset + static_cast<u32>(tiled_buffer.size());
|
||||
|
||||
// Does this line have any significance?
|
||||
//u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
|
||||
@ -216,7 +215,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset,
|
||||
}
|
||||
}
|
||||
|
||||
using MortonFunc = void (*)(u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
|
||||
using MortonFunc = void (*)(u32, u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "common/logging/log.h"
|
||||
@ -46,9 +47,9 @@ class RasterizerAccelerated;
|
||||
template <class T>
|
||||
class RasterizerCache : NonCopyable {
|
||||
public:
|
||||
using TextureRuntime = typename T::Runtime;
|
||||
using Surface = std::shared_ptr<typename T::Surface>;
|
||||
using Watcher = SurfaceWatcher<typename T::Surface>;
|
||||
using TextureRuntime = typename T::RuntimeType;
|
||||
using Surface = std::shared_ptr<typename T::SurfaceType>;
|
||||
using Watcher = SurfaceWatcher<typename T::SurfaceType>;
|
||||
|
||||
private:
|
||||
/// Declare rasterizer interval types
|
||||
@ -754,7 +755,7 @@ auto RasterizerCache<T>::GetFillSurface(const GPU::Regs::MemoryFillConfig& confi
|
||||
params.type = SurfaceType::Fill;
|
||||
params.res_scale = std::numeric_limits<u16>::max();
|
||||
|
||||
Surface new_surface = std::make_shared<typename T::Surface>(params, runtime);
|
||||
Surface new_surface = std::make_shared<typename T::SurfaceType>(params, runtime);
|
||||
|
||||
std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4);
|
||||
if (config.fill_32bit) {
|
||||
@ -893,32 +894,23 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
ASSERT(load_start >= surface->addr && load_end <= surface->end);
|
||||
|
||||
const auto& staging = runtime.FindStaging(
|
||||
surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true);
|
||||
surface->width * surface->height * 4, true);
|
||||
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr);
|
||||
if (!source_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
|
||||
const u32 start_offset = load_start - surface->addr;
|
||||
const u32 upload_size = static_cast<u32>(upload_data.size());
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
||||
|
||||
if (!surface->is_tiled) {
|
||||
ASSERT(surface->type == SurfaceType::Color);
|
||||
|
||||
const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size);
|
||||
/*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) {
|
||||
Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer);
|
||||
} else if (surface->pixel_format == PixelFormat::RGB8 && GLES) {
|
||||
Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer);
|
||||
} else {
|
||||
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
|
||||
}*/
|
||||
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> unswizzled_data(staging.size);
|
||||
UnswizzleTexture(*surface, load_start - surface->addr, load_end - surface->addr,
|
||||
upload_data, unswizzled_data);
|
||||
runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped);
|
||||
} else {
|
||||
UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped);
|
||||
runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped);
|
||||
}
|
||||
|
||||
const BufferTextureCopy upload = {
|
||||
@ -939,7 +931,7 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
ASSERT(flush_start >= surface->addr && flush_end <= surface->end);
|
||||
|
||||
const auto& staging = runtime.FindStaging(
|
||||
surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false);
|
||||
surface->width * surface->height * 4, false);
|
||||
const SurfaceParams params = surface->FromInterval(interval);
|
||||
const BufferTextureCopy download = {
|
||||
.buffer_offset = 0,
|
||||
@ -956,25 +948,16 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
}
|
||||
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
const u32 start_offset = flush_start - surface->addr;
|
||||
const u32 download_size = static_cast<u32>(download_dest.size());
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
|
||||
|
||||
if (!surface->is_tiled) {
|
||||
ASSERT(surface->type == SurfaceType::Color);
|
||||
|
||||
const auto download_data = staging.mapped.subspan(start_offset, download_size);
|
||||
/*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) {
|
||||
Pica::Texture::ConvertABGRToRGBA(download_data, download_dest);
|
||||
} else if (surface->pixel_format == PixelFormat::RGB8 && GLES) {
|
||||
Pica::Texture::ConvertBGRToRGB(download_data, download_dest);
|
||||
} else {
|
||||
std::memcpy(download_dest.data(), download_data.data(), download_size);
|
||||
}*/
|
||||
std::memcpy(download_dest.data(), download_data.data(), download_size);
|
||||
if (surface->is_tiled) {
|
||||
std::vector<std::byte> swizzled_data(staging.size);
|
||||
runtime.FormatConvert(surface->pixel_format, false, swizzled_data, swizzled_data);
|
||||
SwizzleTexture(*surface, flush_start - surface->addr, flush_end - surface->addr,
|
||||
staging.mapped, download_dest);
|
||||
} else {
|
||||
SwizzleTexture(*surface, start_offset, staging.mapped, download_dest);
|
||||
runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1228,7 +1211,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, const Surface& r
|
||||
|
||||
template <class T>
|
||||
auto RasterizerCache<T>::CreateSurface(SurfaceParams& params) -> Surface {
|
||||
Surface surface = std::make_shared<typename T::Surface>(params, runtime);
|
||||
Surface surface = std::make_shared<typename T::SurfaceType>(params, runtime);
|
||||
surface->invalid_regions.insert(surface->GetInterval());
|
||||
|
||||
return surface;
|
||||
|
@ -3,30 +3,26 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <glad/glad.h>
|
||||
#include "common/assert.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/texture/texture_decode.h"
|
||||
#include "video_core/rasterizer_cache/morton_swizzle.h"
|
||||
#include "video_core/rasterizer_cache/surface_params.h"
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
|
||||
SwizzleImpl(params.stride, params.height, start_offset, source_linear, dest_tiled);
|
||||
SwizzleImpl(params.stride, params.height, start_offset, end_offset, source_linear, dest_tiled);
|
||||
}
|
||||
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
|
||||
UnswizzleImpl(params.stride, params.height, start_offset, dest_linear, source_tiled);
|
||||
UnswizzleImpl(params.stride, params.height, start_offset, end_offset, dest_linear, source_tiled);
|
||||
}
|
||||
|
||||
ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) {
|
||||
@ -68,4 +64,4 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
} // namespace VideoCore
|
||||
|
@ -14,7 +14,6 @@ struct HostTextureTag {
|
||||
PixelFormat format{};
|
||||
u32 width = 0;
|
||||
u32 height = 0;
|
||||
u32 levels = 1;
|
||||
u32 layers = 1;
|
||||
|
||||
auto operator<=>(const HostTextureTag&) const noexcept = default;
|
||||
@ -45,7 +44,7 @@ class SurfaceParams;
|
||||
|
||||
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
|
||||
|
||||
/**
|
||||
@ -56,7 +55,7 @@ void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
* @param source_tiled The source morton swizzled data.
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
*/
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -9,10 +9,6 @@
|
||||
#include "common/common_types.h"
|
||||
#include "core/hw/gpu.h"
|
||||
|
||||
namespace OpenGL {
|
||||
struct ScreenInfo;
|
||||
}
|
||||
|
||||
namespace Pica::Shader {
|
||||
struct OutputVertex;
|
||||
} // namespace Pica::Shader
|
||||
@ -73,13 +69,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt to use a faster method to display the framebuffer to screen
|
||||
virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
|
||||
PAddr framebuffer_addr, u32 pixel_stride,
|
||||
OpenGL::ScreenInfo& screen_info) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attempt to draw using hardware shaders
|
||||
virtual bool AccelerateDrawBatch(bool is_indexed) {
|
||||
return false;
|
||||
|
@ -159,6 +159,7 @@ struct FramebufferRegs {
|
||||
} stencil_test;
|
||||
|
||||
union {
|
||||
u32 depth_color_mask;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<4, 3, CompareFunc> depth_test_func;
|
||||
BitField<8, 1, u32> red_enable;
|
||||
|
@ -6,8 +6,7 @@
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/pica_types.h"
|
||||
|
||||
namespace Pica {
|
||||
@ -18,7 +17,7 @@ struct RasterizerRegs {
|
||||
KeepAll = 0,
|
||||
KeepClockWise = 1,
|
||||
KeepCounterClockWise = 2,
|
||||
// TODO: What does the third value imply?
|
||||
KeepAll2 = 3
|
||||
};
|
||||
|
||||
union {
|
||||
|
@ -21,7 +21,3 @@ void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) {
|
||||
update_layout(*secondary_window);
|
||||
}
|
||||
}
|
||||
|
||||
void RendererBase::Sync() {
|
||||
rasterizer->SyncEntireState();
|
||||
}
|
||||
|
@ -21,6 +21,9 @@ public:
|
||||
/// Initialize the renderer
|
||||
virtual VideoCore::ResultStatus Init() = 0;
|
||||
|
||||
/// Returns the rasterizer owned by the renderer
|
||||
virtual VideoCore::RasterizerInterface* Rasterizer() = 0;
|
||||
|
||||
/// Shutdown the renderer
|
||||
virtual void ShutDown() = 0;
|
||||
|
||||
@ -40,6 +43,8 @@ public:
|
||||
/// Cleans up after video dumping is ended
|
||||
virtual void CleanupVideoDumping() = 0;
|
||||
|
||||
virtual void Sync() = 0;
|
||||
|
||||
/// Updates the framebuffer layout of the contained render window handle.
|
||||
void UpdateCurrentFramebufferLayout(bool is_portrait_mode = {});
|
||||
|
||||
@ -54,10 +59,6 @@ public:
|
||||
return m_current_frame;
|
||||
}
|
||||
|
||||
VideoCore::RasterizerInterface* Rasterizer() const {
|
||||
return rasterizer.get();
|
||||
}
|
||||
|
||||
Frontend::EmuWindow& GetRenderWindow() {
|
||||
return render_window;
|
||||
}
|
||||
@ -66,12 +67,9 @@ public:
|
||||
return render_window;
|
||||
}
|
||||
|
||||
void Sync();
|
||||
|
||||
protected:
|
||||
Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
|
||||
Frontend::EmuWindow* secondary_window; ///< Reference to the secondary render window handle.
|
||||
std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
|
||||
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
|
||||
int m_current_frame = 0; ///< Current frame, should be set by the renderer
|
||||
};
|
||||
|
@ -112,14 +112,15 @@ void Driver::ReportDriverInfo() {
|
||||
}
|
||||
|
||||
void Driver::DeduceVendor() {
|
||||
if (gpu_vendor.contains("NVIDIA")) {
|
||||
if (gpu_vendor.find("NVIDIA") != gpu_vendor.npos) {
|
||||
vendor = Vendor::Nvidia;
|
||||
} else if (gpu_vendor.contains("ATI") ||
|
||||
gpu_vendor.contains("Advanced Micro Devices")) {
|
||||
} else if ((gpu_vendor.find("ATI") != gpu_vendor.npos) ||
|
||||
(gpu_vendor.find("AMD") != gpu_vendor.npos) ||
|
||||
(gpu_vendor.find("Advanced Micro Devices") != gpu_vendor.npos)) {
|
||||
vendor = Vendor::AMD;
|
||||
} else if (gpu_vendor.contains("Intel")) {
|
||||
} else if (gpu_vendor.find("Intel") != gpu_vendor.npos) {
|
||||
vendor = Vendor::Intel;
|
||||
} else if (gpu_vendor.contains("GDI Generic")) {
|
||||
} else if (gpu_vendor.find("GDI Generic") != gpu_vendor.npos) {
|
||||
vendor = Vendor::Generic;
|
||||
}
|
||||
}
|
||||
|
@ -243,17 +243,12 @@ private:
|
||||
};
|
||||
|
||||
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
|
||||
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
|
||||
const std::string_view version{reinterpret_cast<const char*>(glGetString(GL_VERSION))};
|
||||
|
||||
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
const u32 dst_index = static_cast<u32>(dest);
|
||||
return reinterpreters[dst_index].push_back(std::move(obj));
|
||||
};
|
||||
|
||||
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
|
||||
LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
|
||||
|
||||
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,9 @@ class EmuWindow;
|
||||
}
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Driver;
|
||||
class ShaderProgramManager;
|
||||
|
||||
@ -43,7 +46,7 @@ public:
|
||||
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
|
||||
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
|
||||
u32 pixel_stride, ScreenInfo& screen_info) override;
|
||||
u32 pixel_stride, ScreenInfo& screen_info);
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
|
||||
/// Syncs entire status to match PICA registers
|
||||
|
@ -41,10 +41,12 @@ struct LightSrc {
|
||||
float dist_atten_scale;
|
||||
};
|
||||
|
||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
|
||||
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
// Not following that rule will cause problems on some AMD drivers.
|
||||
/**
|
||||
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
|
||||
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
* Not following that rule will cause problems on some AMD drivers.
|
||||
*/
|
||||
struct UniformData {
|
||||
int framebuffer_scale;
|
||||
int alphatest_ref;
|
||||
@ -81,8 +83,10 @@ static_assert(sizeof(UniformData) == 0x4F0,
|
||||
static_assert(sizeof(UniformData) < 16384,
|
||||
"UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
|
||||
// NOTE: the same rule from UniformData also applies here.
|
||||
/**
|
||||
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
|
||||
* NOTE: the same rule from UniformData also applies here.
|
||||
*/
|
||||
struct PicaUniformsData {
|
||||
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
|
@ -124,6 +124,17 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
|
||||
return DEFAULT_TUPLE;
|
||||
}
|
||||
|
||||
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest) {
|
||||
if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
|
||||
Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {
|
||||
Pica::Texture::ConvertBGRToRGB(source, dest);
|
||||
} else {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
}
|
||||
}
|
||||
|
||||
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
|
||||
@ -302,9 +313,20 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
|
||||
texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
Surface::~Surface() {
|
||||
const VideoCore::HostTextureTag tag = {
|
||||
.format = pixel_format,
|
||||
.width = GetScaledWidth(),
|
||||
.height = GetScaledHeight(),
|
||||
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u
|
||||
};
|
||||
|
||||
runtime.texture_recycler.emplace(tag, std::move(texture));
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
|
||||
MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
|
||||
MICROPROFILE_SCOPE(OpenGL_Upload);
|
||||
|
||||
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
|
||||
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
|
||||
@ -327,8 +349,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
|
||||
upload.texture_rect.left, upload.texture_rect.bottom,
|
||||
upload.texture_rect.GetWidth(),
|
||||
upload.texture_rect.GetHeight(),
|
||||
tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(upload.buffer_offset));
|
||||
tuple.format, tuple.type, 0);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
@ -339,9 +360,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
|
||||
InvalidateAllWatcher();
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64));
|
||||
MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) {
|
||||
MICROPROFILE_SCOPE(RasterizerCache_TextureDL);
|
||||
MICROPROFILE_SCOPE(OpenGL_Download);
|
||||
|
||||
// Ensure no bad interactions with GL_PACK_ALIGNMENT
|
||||
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
|
||||
@ -361,7 +382,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
glReadPixels(download.texture_rect.left, download.texture_rect.bottom,
|
||||
download.texture_rect.GetWidth(), download.texture_rect.GetHeight(),
|
||||
tuple.format, tuple.type, reinterpret_cast<void*>(download.buffer_offset));
|
||||
tuple.format, tuple.type, 0);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
@ -390,11 +411,9 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
|
||||
if (driver.IsOpenGLES()) {
|
||||
const auto& downloader_es = runtime.GetDownloaderES();
|
||||
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
rect_height, rect_width,
|
||||
reinterpret_cast<void*>(download.buffer_offset));
|
||||
rect_height, rect_width, 0);
|
||||
} else {
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(download.buffer_offset));
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -409,7 +428,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
|
||||
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
|
||||
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
|
||||
tuple.format, tuple.type, 0);
|
||||
|
||||
const auto scaled_rect = upload.texture_rect * res_scale;
|
||||
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
|
||||
|
@ -70,6 +70,10 @@ public:
|
||||
/// Returns the OpenGL format tuple associated with the provided pixel format
|
||||
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
|
||||
|
||||
/// Performs required format convertions on the staging data
|
||||
void FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
/// Allocates an OpenGL texture with the specified dimentions and format
|
||||
OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type);
|
||||
@ -124,7 +128,7 @@ private:
|
||||
class Surface : public VideoCore::SurfaceBase<Surface> {
|
||||
public:
|
||||
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
|
||||
~Surface() override = default;
|
||||
~Surface() override;
|
||||
|
||||
/// Uploads pixel data in staging to a rectangle region of the surface texture
|
||||
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);
|
||||
@ -148,8 +152,8 @@ public:
|
||||
};
|
||||
|
||||
struct Traits {
|
||||
using Runtime = TextureRuntime;
|
||||
using Surface = Surface;
|
||||
using RuntimeType = TextureRuntime;
|
||||
using SurfaceType = Surface;
|
||||
};
|
||||
|
||||
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "core/memory.h"
|
||||
#include "core/tracer/recorder.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
@ -383,6 +382,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
|
||||
return VideoCore::ResultStatus::Success;
|
||||
}
|
||||
|
||||
VideoCore::RasterizerInterface* RendererOpenGL::Rasterizer() {
|
||||
return rasterizer.get();
|
||||
}
|
||||
|
||||
/// Shutdown the renderer
|
||||
void RendererOpenGL::ShutDown() {}
|
||||
|
||||
@ -580,7 +583,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
|
||||
// only allows rows to have a memory alignement of 4.
|
||||
ASSERT(pixel_stride % 4 == 0);
|
||||
|
||||
if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr,
|
||||
if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr,
|
||||
static_cast<u32>(pixel_stride), screen_info)) {
|
||||
// Reset the screen info's display texture to its own permanent texture
|
||||
screen_info.display_texture = screen_info.texture.resource.handle;
|
||||
@ -1214,4 +1217,8 @@ void RendererOpenGL::CleanupVideoDumping() {
|
||||
mailbox->free_cv.notify_one();
|
||||
}
|
||||
|
||||
void RendererOpenGL::Sync() {
|
||||
rasterizer->SyncEntireState();
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -55,18 +55,16 @@ struct PresentationTexture {
|
||||
OGLTexture texture;
|
||||
};
|
||||
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class RendererOpenGL : public RendererBase {
|
||||
public:
|
||||
explicit RendererOpenGL(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window);
|
||||
~RendererOpenGL() override;
|
||||
|
||||
/// Initialize the renderer
|
||||
VideoCore::ResultStatus Init() override;
|
||||
|
||||
/// Shutdown the renderer
|
||||
VideoCore::RasterizerInterface* Rasterizer() override;
|
||||
void ShutDown() override;
|
||||
|
||||
/// Finalizes rendering the guest frame
|
||||
void SwapBuffers() override;
|
||||
|
||||
/// Draws the latest frame from texture mailbox to the currently bound draw framebuffer in this
|
||||
@ -75,9 +73,8 @@ public:
|
||||
|
||||
/// Prepares for video dumping (e.g. create necessary buffers, etc)
|
||||
void PrepareVideoDumping() override;
|
||||
|
||||
/// Cleans up after video dumping is ended
|
||||
void CleanupVideoDumping() override;
|
||||
void Sync() override;
|
||||
|
||||
private:
|
||||
void InitOpenGLObjects();
|
||||
@ -108,6 +105,7 @@ private:
|
||||
private:
|
||||
Driver driver;
|
||||
OpenGLState state;
|
||||
std::unique_ptr<RasterizerOpenGL> rasterizer;
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLVertexArray vertex_array;
|
||||
|
278
src/video_core/renderer_vulkan/pica_to_vk.h
Normal file
278
src/video_core/renderer_vulkan/pica_to_vk.h
Normal file
@ -0,0 +1,278 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace PicaToVK {
|
||||
|
||||
using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
|
||||
|
||||
struct FilterInfo {
|
||||
vk::Filter mag_filter, min_filter;
|
||||
vk::SamplerMipmapMode mip_mode;
|
||||
};
|
||||
|
||||
inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
|
||||
constexpr std::array filter_table = {
|
||||
vk::Filter::eNearest,
|
||||
vk::Filter::eLinear
|
||||
};
|
||||
|
||||
constexpr std::array mipmap_table = {
|
||||
vk::SamplerMipmapMode::eNearest,
|
||||
vk::SamplerMipmapMode::eLinear
|
||||
};
|
||||
|
||||
return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
|
||||
}
|
||||
|
||||
inline vk::Filter TextureFilterMode(TextureFilter mode) {
|
||||
switch (mode) {
|
||||
case TextureFilter::Linear:
|
||||
return vk::Filter::eLinear;
|
||||
case TextureFilter::Nearest:
|
||||
return vk::Filter::eNearest;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown texture filtering mode {}", mode);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
return vk::Filter::eLinear;
|
||||
}
|
||||
|
||||
inline vk::SamplerMipmapMode TextureMipFilterMode(TextureFilter mip) {
|
||||
switch (mip) {
|
||||
case TextureFilter::Linear:
|
||||
return vk::SamplerMipmapMode::eLinear;
|
||||
case TextureFilter::Nearest:
|
||||
return vk::SamplerMipmapMode::eNearest;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown texture mipmap filtering mode {}", mip);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
return vk::SamplerMipmapMode::eLinear;
|
||||
}
|
||||
|
||||
inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
|
||||
static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
|
||||
vk::SamplerAddressMode::eClampToEdge,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
vk::SamplerAddressMode::eRepeat,
|
||||
vk::SamplerAddressMode::eMirroredRepeat,
|
||||
// TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
|
||||
// comments in enum WrapMode.
|
||||
vk::SamplerAddressMode::eClampToEdge,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
vk::SamplerAddressMode::eRepeat,
|
||||
vk::SamplerAddressMode::eRepeat,
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(mode);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= wrap_mode_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown texture wrap mode {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::SamplerAddressMode::eClampToEdge;
|
||||
}
|
||||
|
||||
if (index > 3) {
|
||||
Core::System::GetInstance().TelemetrySession().AddField(
|
||||
Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode",
|
||||
static_cast<u32>(index));
|
||||
LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index);
|
||||
}
|
||||
|
||||
return wrap_mode_table[index];
|
||||
}
|
||||
|
||||
inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
|
||||
static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
|
||||
vk::BlendOp::eAdd,
|
||||
vk::BlendOp::eSubtract,
|
||||
vk::BlendOp::eReverseSubtract,
|
||||
vk::BlendOp::eMin,
|
||||
vk::BlendOp::eMax,
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(equation);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= blend_equation_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown blend equation {}", index);
|
||||
|
||||
// This return value is hwtested, not just a stub
|
||||
return vk::BlendOp::eAdd;
|
||||
}
|
||||
|
||||
return blend_equation_table[index];
|
||||
}
|
||||
|
||||
inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
|
||||
static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
|
||||
vk::BlendFactor::eZero, // BlendFactor::Zero
|
||||
vk::BlendFactor::eOne, // BlendFactor::One
|
||||
vk::BlendFactor::eSrcColor, // BlendFactor::SourceColor
|
||||
vk::BlendFactor::eOneMinusSrcColor, // BlendFactor::OneMinusSourceColor
|
||||
vk::BlendFactor::eDstColor, // BlendFactor::DestColor
|
||||
vk::BlendFactor::eOneMinusDstColor, // BlendFactor::OneMinusDestColor
|
||||
vk::BlendFactor::eSrcAlpha, // BlendFactor::SourceAlpha
|
||||
vk::BlendFactor::eOneMinusSrcAlpha, // BlendFactor::OneMinusSourceAlpha
|
||||
vk::BlendFactor::eDstAlpha, // BlendFactor::DestAlpha
|
||||
vk::BlendFactor::eOneMinusDstAlpha, // BlendFactor::OneMinusDestAlpha
|
||||
vk::BlendFactor::eConstantColor, // BlendFactor::ConstantColor
|
||||
vk::BlendFactor::eOneMinusConstantColor,// BlendFactor::OneMinusConstantColor
|
||||
vk::BlendFactor::eConstantAlpha, // BlendFactor::ConstantAlpha
|
||||
vk::BlendFactor::eOneMinusConstantAlpha,// BlendFactor::OneMinusConstantAlpha
|
||||
vk::BlendFactor::eSrcAlphaSaturate, // BlendFactor::SourceAlphaSaturate
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(factor);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= blend_func_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown blend factor {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::BlendFactor::eOne;
|
||||
}
|
||||
|
||||
return blend_func_table[index];
|
||||
}
|
||||
|
||||
inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
|
||||
static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
|
||||
vk::LogicOp::eClear, // Clear
|
||||
vk::LogicOp::eAnd, // And
|
||||
vk::LogicOp::eAndReverse, // AndReverse
|
||||
vk::LogicOp::eCopy, // Copy
|
||||
vk::LogicOp::eSet, // Set
|
||||
vk::LogicOp::eCopyInverted, // CopyInverted
|
||||
vk::LogicOp::eNoOp, // NoOp
|
||||
vk::LogicOp::eInvert, // Invert
|
||||
vk::LogicOp::eNand, // Nand
|
||||
vk::LogicOp::eOr, // Or
|
||||
vk::LogicOp::eNor, // Nor
|
||||
vk::LogicOp::eXor, // Xor
|
||||
vk::LogicOp::eEquivalent, // Equiv
|
||||
vk::LogicOp::eAndInverted, // AndInverted
|
||||
vk::LogicOp::eOrReverse, // OrReverse
|
||||
vk::LogicOp::eOrInverted, // OrInverted
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(op);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= logic_op_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown logic op {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::LogicOp::eCopy;
|
||||
}
|
||||
|
||||
return logic_op_table[index];
|
||||
}
|
||||
|
||||
inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
|
||||
static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
|
||||
vk::CompareOp::eNever, // CompareFunc::Never
|
||||
vk::CompareOp::eAlways, // CompareFunc::Always
|
||||
vk::CompareOp::eEqual, // CompareFunc::Equal
|
||||
vk::CompareOp::eNotEqual, // CompareFunc::NotEqual
|
||||
vk::CompareOp::eLess, // CompareFunc::LessThan
|
||||
vk::CompareOp::eLessOrEqual, // CompareFunc::LessThanOrEqual
|
||||
vk::CompareOp::eGreater, // CompareFunc::GreaterThan
|
||||
vk::CompareOp::eGreaterOrEqual, // CompareFunc::GreaterThanOrEqual
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(func);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= compare_func_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown compare function {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::CompareOp::eAlways;
|
||||
}
|
||||
|
||||
return compare_func_table[index];
|
||||
}
|
||||
|
||||
inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
|
||||
static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
|
||||
vk::StencilOp::eKeep, // StencilAction::Keep
|
||||
vk::StencilOp::eZero, // StencilAction::Zero
|
||||
vk::StencilOp::eReplace, // StencilAction::Replace
|
||||
vk::StencilOp::eIncrementAndClamp, // StencilAction::Increment
|
||||
vk::StencilOp::eDecrementAndClamp, // StencilAction::Decrement
|
||||
vk::StencilOp::eInvert, // StencilAction::Invert
|
||||
vk::StencilOp::eIncrementAndWrap, // StencilAction::IncrementWrap
|
||||
vk::StencilOp::eDecrementAndWrap, // StencilAction::DecrementWrap
|
||||
}};
|
||||
|
||||
const auto index = static_cast<std::size_t>(action);
|
||||
|
||||
// Range check table for input
|
||||
if (index >= stencil_op_table.size()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown stencil op {}", index);
|
||||
UNREACHABLE();
|
||||
|
||||
return vk::StencilOp::eKeep;
|
||||
}
|
||||
|
||||
return stencil_op_table[index];
|
||||
}
|
||||
|
||||
inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopology topology) {
|
||||
switch (topology) {
|
||||
case Pica::PipelineRegs::TriangleTopology::Fan:
|
||||
return vk::PrimitiveTopology::eTriangleFan;
|
||||
case Pica::PipelineRegs::TriangleTopology::List:
|
||||
case Pica::PipelineRegs::TriangleTopology::Shader:
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Pica::PipelineRegs::TriangleTopology::Strip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
}
|
||||
}
|
||||
|
||||
inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
|
||||
switch (mode) {
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll:
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll2:
|
||||
return vk::CullModeFlagBits::eNone;
|
||||
case Pica::RasterizerRegs::CullMode::KeepClockWise:
|
||||
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
|
||||
return vk::CullModeFlagBits::eBack;
|
||||
}
|
||||
}
|
||||
|
||||
inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
|
||||
switch (mode) {
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll:
|
||||
case Pica::RasterizerRegs::CullMode::KeepAll2:
|
||||
case Pica::RasterizerRegs::CullMode::KeepClockWise:
|
||||
return vk::FrontFace::eCounterClockwise;
|
||||
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
|
||||
return vk::FrontFace::eClockwise;
|
||||
}
|
||||
}
|
||||
|
||||
inline Common::Vec4f ColorRGBA8(const u32 color) {
|
||||
const auto rgba =
|
||||
Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
|
||||
return rgba / 255.0f;
|
||||
}
|
||||
|
||||
inline Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
|
||||
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
|
||||
}
|
||||
|
||||
} // namespace PicaToGL
|
1055
src/video_core/renderer_vulkan/renderer_vulkan.cpp
Normal file
1055
src/video_core/renderer_vulkan/renderer_vulkan.cpp
Normal file
File diff suppressed because it is too large
Load Diff
126
src/video_core/renderer_vulkan/renderer_vulkan.h
Normal file
126
src/video_core/renderer_vulkan/renderer_vulkan.h
Normal file
@ -0,0 +1,126 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <glm/glm.hpp>
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Structure used for storing information about the textures for each 3DS screen
|
||||
struct TextureInfo {
|
||||
ImageAlloc alloc;
|
||||
u32 width;
|
||||
u32 height;
|
||||
GPU::Regs::PixelFormat format;
|
||||
};
|
||||
|
||||
/// Structure used for storing information about the display target for each 3DS screen
|
||||
struct ScreenInfo {
|
||||
ImageAlloc* display_texture = nullptr;
|
||||
Common::Rectangle<float> display_texcoords;
|
||||
TextureInfo texture;
|
||||
vk::Sampler sampler;
|
||||
};
|
||||
|
||||
// Uniform data used for presenting the 3DS screens
|
||||
struct PresentUniformData {
|
||||
glm::mat4 modelview;
|
||||
Common::Vec4f i_resolution;
|
||||
Common::Vec4f o_resolution;
|
||||
int screen_id_l = 0;
|
||||
int screen_id_r = 0;
|
||||
int layer = 0;
|
||||
int reverse_interlaced = 0;
|
||||
|
||||
// Returns an immutable byte view of the uniform data
|
||||
auto AsBytes() const {
|
||||
return std::as_bytes(std::span{this, 1});
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!");
|
||||
|
||||
constexpr u32 PRESENT_PIPELINES = 3;
|
||||
|
||||
class RasterizerVulkan;
|
||||
|
||||
class RendererVulkan : public RendererBase {
|
||||
public:
|
||||
RendererVulkan(Frontend::EmuWindow& window);
|
||||
~RendererVulkan() override;
|
||||
|
||||
VideoCore::ResultStatus Init() override;
|
||||
VideoCore::RasterizerInterface* Rasterizer() override;
|
||||
void ShutDown() override;
|
||||
void SwapBuffers() override;
|
||||
void TryPresent(int timeout_ms) override {}
|
||||
void PrepareVideoDumping() override {}
|
||||
void CleanupVideoDumping() override {}
|
||||
void Sync() override;
|
||||
|
||||
private:
|
||||
void ReloadSampler();
|
||||
void ReloadPipeline();
|
||||
void CompileShaders();
|
||||
void BuildLayouts();
|
||||
void BuildPipelines();
|
||||
void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
void ConfigureRenderPipeline();
|
||||
void PrepareRendertarget();
|
||||
void BeginRendering();
|
||||
|
||||
void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped);
|
||||
void DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h);
|
||||
void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h);
|
||||
void DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h);
|
||||
void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h);
|
||||
|
||||
void UpdateFramerate();
|
||||
|
||||
/// Loads framebuffer from emulated memory into the display information structure
|
||||
void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
|
||||
ScreenInfo& screen_info, bool right_eye);
|
||||
|
||||
private:
|
||||
Instance instance;
|
||||
TaskScheduler scheduler;
|
||||
RenderpassCache renderpass_cache;
|
||||
TextureRuntime runtime;
|
||||
Swapchain swapchain;
|
||||
std::unique_ptr<RasterizerVulkan> rasterizer;
|
||||
StreamBuffer vertex_buffer;
|
||||
|
||||
// Present pipelines (Normal, Anaglyph, Interlaced)
|
||||
vk::PipelineLayout present_pipeline_layout;
|
||||
vk::DescriptorSetLayout present_descriptor_layout;
|
||||
vk::DescriptorUpdateTemplate present_update_template;
|
||||
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
|
||||
std::array<vk::DescriptorSet, PRESENT_PIPELINES> present_descriptor_sets;
|
||||
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
|
||||
std::array<vk::Sampler, 2> present_samplers;
|
||||
vk::ShaderModule present_vertex_shader;
|
||||
u32 current_pipeline = 0;
|
||||
u32 current_sampler = 0;
|
||||
|
||||
/// Display information for top and bottom screens respectively
|
||||
std::array<ScreenInfo, 3> screen_infos{};
|
||||
PresentUniformData draw_info{};
|
||||
vk::ClearColorValue clear_color{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
9
src/video_core/renderer_vulkan/vk_common.cpp
Normal file
9
src/video_core/renderer_vulkan/vk_common.cpp
Normal file
@ -0,0 +1,9 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VMA_IMPLEMENTATION
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
// Store the dispatch loader here
|
||||
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
72
src/video_core/renderer_vulkan/vk_common.h
Normal file
72
src/video_core/renderer_vulkan/vk_common.h
Normal file
@ -0,0 +1,72 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
// Include Vulkan memory allocator
|
||||
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
||||
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
|
||||
#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
|
||||
|
||||
/// Return the image aspect associated on the provided format
|
||||
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eD16UnormS8Uint:
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
case vk::Format::eD32SfloatS8Uint:
|
||||
return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
case vk::Format::eD16Unorm:
|
||||
case vk::Format::eD32Sfloat:
|
||||
return vk::ImageAspectFlagBits::eDepth;
|
||||
break;
|
||||
default:
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a bit mask with the required usage of a format with a particular aspect
|
||||
constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
|
||||
auto usage = vk::ImageUsageFlagBits::eSampled |
|
||||
vk::ImageUsageFlagBits::eTransferDst |
|
||||
vk::ImageUsageFlagBits::eTransferSrc;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
return usage | vk::ImageUsageFlagBits::eStorage |
|
||||
vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a bit mask with the required features of a format with a particular aspect
|
||||
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
|
||||
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
|
||||
vk::FormatFeatureFlagBits::eTransferDst |
|
||||
vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitDst;
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
|
||||
} else {
|
||||
return usage | vk::FormatFeatureFlagBits::eStorageImage |
|
||||
vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
268
src/video_core/renderer_vulkan/vk_instance.cpp
Normal file
268
src/video_core/renderer_vulkan/vk_instance.cpp
Normal file
@ -0,0 +1,268 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <span>
|
||||
#include "common/assert.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Instance::Instance(Frontend::EmuWindow& window) {
|
||||
auto window_info = window.GetWindowInfo();
|
||||
|
||||
// Fetch instance independant function pointers
|
||||
vk::DynamicLoader dl;
|
||||
auto vkGetInstanceProcAddr = dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
|
||||
|
||||
// Enable the instance extensions the backend uses
|
||||
auto extensions = GetInstanceExtensions(window_info.type, true);
|
||||
|
||||
// We require a Vulkan 1.1 driver
|
||||
const u32 available_version = vk::enumerateInstanceVersion();
|
||||
if (available_version < VK_API_VERSION_1_1) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
|
||||
}
|
||||
|
||||
const vk::ApplicationInfo application_info = {
|
||||
.pApplicationName = "Citra",
|
||||
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.pEngineName = "Citra Vulkan",
|
||||
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.apiVersion = available_version
|
||||
};
|
||||
|
||||
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
|
||||
const vk::InstanceCreateInfo instance_info = {
|
||||
.pApplicationInfo = &application_info,
|
||||
.enabledLayerCount = static_cast<u32>(layers.size()),
|
||||
.ppEnabledLayerNames = layers.data(),
|
||||
.enabledExtensionCount = static_cast<u32>(extensions.size()),
|
||||
.ppEnabledExtensionNames = extensions.data()
|
||||
};
|
||||
|
||||
instance = vk::createInstance(instance_info);
|
||||
surface = CreateSurface(instance, window);
|
||||
|
||||
// TODO: GPU select dialog
|
||||
auto physical_devices = instance.enumeratePhysicalDevices();
|
||||
physical_device = physical_devices[1];
|
||||
device_properties = physical_device.getProperties();
|
||||
|
||||
CreateDevice();
|
||||
}
|
||||
|
||||
Instance::~Instance() {
|
||||
device.waitIdle();
|
||||
vmaDestroyAllocator(allocator);
|
||||
device.destroy();
|
||||
instance.destroySurfaceKHR(surface);
|
||||
instance.destroy();
|
||||
}
|
||||
|
||||
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
|
||||
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
|
||||
if (auto it = supported.find(format); it != supported.end()) {
|
||||
return (it->second.optimalTilingFeatures & usage) == usage;
|
||||
}
|
||||
|
||||
// Cache format properties so we don't have to query the driver all the time
|
||||
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
|
||||
supported.insert(std::make_pair(format, properties));
|
||||
|
||||
return (properties.optimalTilingFeatures & usage) == usage;
|
||||
}
|
||||
|
||||
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
|
||||
if (format == vk::Format::eUndefined) {
|
||||
return format;
|
||||
}
|
||||
|
||||
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
|
||||
if (IsFormatSupported(format, features)) {
|
||||
return format;
|
||||
}
|
||||
|
||||
// Return the most supported alternative format preferably with the
|
||||
// same block size according to the Vulkan spec.
|
||||
// See 43.3. Required Format Support of the Vulkan spec
|
||||
switch (format) {
|
||||
case vk::Format::eD24UnormS8Uint:
|
||||
return vk::Format::eD32SfloatS8Uint;
|
||||
case vk::Format::eX8D24UnormPack32:
|
||||
return vk::Format::eD32Sfloat;
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
return vk::Format::eA1R5G5B5UnormPack16;
|
||||
case vk::Format::eR8G8B8Unorm:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case vk::Format::eUndefined:
|
||||
return vk::Format::eUndefined;
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
// B4G4R4A4 is not guaranteed by the spec to support attachments
|
||||
return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16);
|
||||
default:
|
||||
LOG_WARNING(Render_Vulkan, "Format {} doesn't support attachments, falling back to RGBA8",
|
||||
vk::to_string(format));
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
bool Instance::CreateDevice() {
|
||||
auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
|
||||
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
|
||||
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>();
|
||||
|
||||
// Not having geometry shaders will cause issues with accelerated rendering.
|
||||
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
|
||||
if (!available.geometryShader) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
|
||||
}
|
||||
|
||||
auto extension_list = physical_device.enumerateDeviceExtensionProperties();
|
||||
if (extension_list.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper lambda for adding extensions
|
||||
std::array<const char*, 6> enabled_extensions;
|
||||
u32 enabled_extension_count = 0;
|
||||
|
||||
auto AddExtension = [&](std::string_view name) -> bool {
|
||||
auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
|
||||
return name.compare(prop.extensionName.data());
|
||||
});
|
||||
|
||||
if (result != extension_list.end()) {
|
||||
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
|
||||
enabled_extensions[enabled_extension_count++] = name.data();
|
||||
return true;
|
||||
}
|
||||
|
||||
LOG_WARNING(Render_Vulkan, "Extension {} unavailable.", name);
|
||||
return false;
|
||||
};
|
||||
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
|
||||
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
|
||||
// Search queue families for graphics and present queues
|
||||
auto family_properties = physical_device.getQueueFamilyProperties();
|
||||
if (family_properties.empty()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool graphics_queue_found = false;
|
||||
bool present_queue_found = false;
|
||||
for (std::size_t i = 0; i < family_properties.size(); i++) {
|
||||
// Check if queue supports graphics
|
||||
const u32 index = static_cast<u32>(i);
|
||||
if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) {
|
||||
graphics_queue_family_index = index;
|
||||
graphics_queue_found = true;
|
||||
|
||||
// If this queue also supports presentation we are finished
|
||||
if (physical_device.getSurfaceSupportKHR(static_cast<u32>(i), surface)) {
|
||||
present_queue_family_index = index;
|
||||
present_queue_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if queue supports presentation
|
||||
if (physical_device.getSurfaceSupportKHR(index, surface)) {
|
||||
present_queue_family_index = index;
|
||||
present_queue_found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!graphics_queue_found || !present_queue_found) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
|
||||
return false;
|
||||
}
|
||||
|
||||
static constexpr float queue_priorities[] = {1.0f};
|
||||
|
||||
const std::array queue_infos = {
|
||||
vk::DeviceQueueCreateInfo{
|
||||
.queueFamilyIndex = graphics_queue_family_index,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = queue_priorities
|
||||
},
|
||||
vk::DeviceQueueCreateInfo{
|
||||
.queueFamilyIndex = present_queue_family_index,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = queue_priorities
|
||||
}
|
||||
};
|
||||
|
||||
const u32 queue_count = graphics_queue_family_index != present_queue_family_index ? 2u : 1u;
|
||||
const vk::StructureChain device_chain = {
|
||||
vk::DeviceCreateInfo{
|
||||
.queueCreateInfoCount = queue_count,
|
||||
.pQueueCreateInfos = queue_infos.data(),
|
||||
.enabledExtensionCount = enabled_extension_count,
|
||||
.ppEnabledExtensionNames = enabled_extensions.data(),
|
||||
},
|
||||
vk::PhysicalDeviceFeatures2{
|
||||
.features = {
|
||||
.robustBufferAccess = available.robustBufferAccess,
|
||||
.geometryShader = available.geometryShader,
|
||||
.dualSrcBlend = available.dualSrcBlend,
|
||||
.logicOp = available.logicOp,
|
||||
.depthClamp = available.depthClamp,
|
||||
.largePoints = available.largePoints,
|
||||
.samplerAnisotropy = available.samplerAnisotropy,
|
||||
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
|
||||
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = available.shaderClipDistance
|
||||
}
|
||||
},
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
||||
.depthClipControl = true
|
||||
},
|
||||
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
|
||||
feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>()
|
||||
};
|
||||
|
||||
// Create logical device
|
||||
device = physical_device.createDevice(device_chain.get());
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(device);
|
||||
|
||||
// Grab the graphics and present queues.
|
||||
graphics_queue = device.getQueue(graphics_queue_family_index, 0);
|
||||
present_queue = device.getQueue(present_queue_family_index, 0);
|
||||
|
||||
CreateAllocator();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Instance::CreateAllocator() {
|
||||
const VmaVulkanFunctions functions = {
|
||||
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
|
||||
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
|
||||
};
|
||||
|
||||
const VmaAllocatorCreateInfo allocator_info = {
|
||||
.physicalDevice = physical_device,
|
||||
.device = device,
|
||||
.pVulkanFunctions = &functions,
|
||||
.instance = instance,
|
||||
.vulkanApiVersion = VK_API_VERSION_1_1
|
||||
};
|
||||
|
||||
if (VkResult result = vmaCreateAllocator(&allocator_info, &allocator); result != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize VMA with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
129
src/video_core/renderer_vulkan/vk_instance.h
Normal file
129
src/video_core/renderer_vulkan/vk_instance.h
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// The global Vulkan instance
|
||||
class Instance {
|
||||
public:
|
||||
Instance(Frontend::EmuWindow& window);
|
||||
~Instance();
|
||||
|
||||
/// Returns true when the format supports the provided feature flags
|
||||
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
|
||||
|
||||
/// Returns the most compatible format that supports the provided feature flags
|
||||
vk::Format GetFormatAlternative(vk::Format format) const;
|
||||
|
||||
/// Returns the Vulkan instance
|
||||
vk::Instance GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan surface
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Returns the current physical device
|
||||
vk::PhysicalDevice GetPhysicalDevice() const {
|
||||
return physical_device;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan device
|
||||
vk::Device GetDevice() const {
|
||||
return device;
|
||||
}
|
||||
|
||||
VmaAllocator GetAllocator() const {
|
||||
return allocator;
|
||||
}
|
||||
|
||||
/// Retrieve queue information
|
||||
u32 GetGraphicsQueueFamilyIndex() const {
|
||||
return graphics_queue_family_index;
|
||||
}
|
||||
|
||||
u32 GetPresentQueueFamilyIndex() const {
|
||||
return present_queue_family_index;
|
||||
}
|
||||
|
||||
vk::Queue GetGraphicsQueue() const {
|
||||
return graphics_queue;
|
||||
}
|
||||
|
||||
vk::Queue GetPresentQueue() const {
|
||||
return present_queue;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_timeline_semaphore is supported
|
||||
bool IsTimelineSemaphoreSupported() const {
|
||||
return timeline_semaphores;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_extended_dynamic_state is supported
|
||||
bool IsExtendedDynamicStateSupported() const {
|
||||
return extended_dynamic_state;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_push_descriptors is supported
|
||||
bool IsPushDescriptorsSupported() const {
|
||||
return push_descriptors;
|
||||
}
|
||||
|
||||
/// Returns the vendor ID of the physical device
|
||||
u32 GetVendorID() const {
|
||||
return device_properties.vendorID;
|
||||
}
|
||||
|
||||
/// Returns the device ID of the physical device
|
||||
u32 GetDeviceID() const {
|
||||
return device_properties.deviceID;
|
||||
}
|
||||
|
||||
/// Returns the pipeline cache unique identifier
|
||||
const auto GetPipelineCacheUUID() const {
|
||||
return device_properties.pipelineCacheUUID;
|
||||
}
|
||||
|
||||
/// Returns the minimum required alignment for uniforms
|
||||
vk::DeviceSize UniformMinAlignment() const {
|
||||
return device_properties.limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Creates the logical device opportunistically enabling extensions
|
||||
bool CreateDevice();
|
||||
|
||||
/// Creates the VMA allocator handle
|
||||
void CreateAllocator();
|
||||
|
||||
private:
|
||||
vk::Device device;
|
||||
vk::PhysicalDevice physical_device;
|
||||
vk::Instance instance;
|
||||
vk::SurfaceKHR surface;
|
||||
vk::PhysicalDeviceProperties device_properties;
|
||||
VmaAllocator allocator;
|
||||
vk::Queue present_queue;
|
||||
vk::Queue graphics_queue;
|
||||
u32 present_queue_family_index = 0;
|
||||
u32 graphics_queue_family_index = 0;
|
||||
|
||||
bool timeline_semaphores = false;
|
||||
bool extended_dynamic_state = false;
|
||||
bool push_descriptors = false;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
714
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
714
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
@ -0,0 +1,714 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <filesystem>
|
||||
#include "common/common_paths.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct Bindings {
|
||||
std::array<vk::DescriptorType, MAX_DESCRIPTORS> bindings;
|
||||
u32 binding_count;
|
||||
};
|
||||
|
||||
constexpr u32 RASTERIZER_SET_COUNT = 4;
|
||||
constexpr static std::array RASTERIZER_SETS = {
|
||||
Bindings{
|
||||
// Utility set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eUniformBuffer,
|
||||
vk::DescriptorType::eUniformBuffer,
|
||||
vk::DescriptorType::eUniformTexelBuffer,
|
||||
vk::DescriptorType::eUniformTexelBuffer,
|
||||
vk::DescriptorType::eUniformTexelBuffer
|
||||
},
|
||||
.binding_count = 5
|
||||
},
|
||||
Bindings{
|
||||
// Texture set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eSampledImage,
|
||||
vk::DescriptorType::eSampledImage,
|
||||
vk::DescriptorType::eSampledImage,
|
||||
vk::DescriptorType::eSampledImage
|
||||
},
|
||||
.binding_count = 4
|
||||
},
|
||||
Bindings{
|
||||
// Sampler set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eSampler,
|
||||
vk::DescriptorType::eSampler,
|
||||
vk::DescriptorType::eSampler,
|
||||
vk::DescriptorType::eSampler
|
||||
},
|
||||
.binding_count = 4
|
||||
},
|
||||
Bindings {
|
||||
// Shadow set
|
||||
.bindings = {
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage,
|
||||
vk::DescriptorType::eStorageImage
|
||||
},
|
||||
.binding_count = 7
|
||||
}
|
||||
};
|
||||
|
||||
constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) {
|
||||
vk::ShaderStageFlags flags;
|
||||
switch (type) {
|
||||
case vk::DescriptorType::eSampler:
|
||||
case vk::DescriptorType::eSampledImage:
|
||||
case vk::DescriptorType::eUniformTexelBuffer:
|
||||
case vk::DescriptorType::eStorageImage:
|
||||
flags = vk::ShaderStageFlagBits::eFragment;
|
||||
break;
|
||||
case vk::DescriptorType::eUniformBuffer:
|
||||
case vk::DescriptorType::eUniformBufferDynamic:
|
||||
flags = vk::ShaderStageFlagBits::eFragment |
|
||||
vk::ShaderStageFlagBits::eVertex |
|
||||
vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eCompute;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
u32 AttribBytes(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
return sizeof(float) * attrib.size;
|
||||
case AttribType::Int:
|
||||
return sizeof(u32) * attrib.size;
|
||||
case AttribType::Short:
|
||||
return sizeof(u16) * attrib.size;
|
||||
case AttribType::Byte:
|
||||
case AttribType::Ubyte:
|
||||
return sizeof(u8) * attrib.size;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
switch (attrib.size) {
|
||||
case 1: return vk::Format::eR32Sfloat;
|
||||
case 2: return vk::Format::eR32G32Sfloat;
|
||||
case 3: return vk::Format::eR32G32B32Sfloat;
|
||||
case 4: return vk::Format::eR32G32B32A32Sfloat;
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return vk::Format::eR32Sfloat;
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
switch (index) {
|
||||
case 0: return vk::ShaderStageFlagBits::eVertex;
|
||||
case 1: return vk::ShaderStageFlagBits::eFragment;
|
||||
case 2: return vk::ShaderStageFlagBits::eGeometry;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
|
||||
descriptor_dirty.fill(true);
|
||||
|
||||
LoadDiskCache();
|
||||
BuildLayout();
|
||||
trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
SaveDiskCache();
|
||||
|
||||
device.destroyPipelineLayout(layout);
|
||||
device.destroyPipelineCache(pipeline_cache);
|
||||
device.destroyShaderModule(trivial_vertex_shader);
|
||||
for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
|
||||
device.destroyDescriptorSetLayout(descriptor_set_layouts[i]);
|
||||
device.destroyDescriptorUpdateTemplate(update_templates[i]);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : programmable_vertex_shaders.shader_cache) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : fixed_geometry_shaders.shaders) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (auto& [key, module] : fragment_shaders.shaders) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
|
||||
for (const auto& [hash, pipeline] : graphics_pipelines) {
|
||||
device.destroyPipeline(pipeline);
|
||||
}
|
||||
|
||||
graphics_pipelines.clear();
|
||||
}
|
||||
|
||||
void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
ApplyDynamic(info);
|
||||
|
||||
// When texture downloads occur the runtime will flush the GPU and cause
|
||||
// a scheduler slot switch behind our back. This might invalidate any
|
||||
// cached descriptor sets/require pipeline rebinding.
|
||||
if (timestamp != scheduler.GetHostFenceCounter()) {
|
||||
MarkDirty();
|
||||
}
|
||||
|
||||
u64 shader_hash = 0;
|
||||
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
|
||||
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
|
||||
}
|
||||
|
||||
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() ?
|
||||
offsetof(PipelineInfo, rasterization) :
|
||||
offsetof(PipelineInfo, depth_stencil) + offsetof(DepthStencilState, stencil_reference);
|
||||
|
||||
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
|
||||
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
|
||||
|
||||
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
|
||||
if (new_pipeline) {
|
||||
it->second = BuildPipeline(info);
|
||||
}
|
||||
|
||||
if (it->second != current_pipeline) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
|
||||
current_pipeline = it->second;
|
||||
}
|
||||
|
||||
BindDescriptorSets();
|
||||
}
|
||||
|
||||
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
const PicaVSConfig config{regs.vs, setup};
|
||||
auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
if (!handle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
current_shaders[ProgramType::VS] = handle;
|
||||
shader_hashes[ProgramType::VS] = config.Hash();
|
||||
return true;
|
||||
}
|
||||
|
||||
void PipelineCache::UseTrivialVertexShader() {
|
||||
current_shaders[ProgramType::VS] = trivial_vertex_shader;
|
||||
shader_hashes[ProgramType::VS] = 0;
|
||||
}
|
||||
|
||||
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
const PicaFixedGSConfig gs_config{regs};
|
||||
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
current_shaders[ProgramType::GS] = handle;
|
||||
shader_hashes[ProgramType::GS] = gs_config.Hash();
|
||||
}
|
||||
|
||||
void PipelineCache::UseTrivialGeometryShader() {
|
||||
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
|
||||
shader_hashes[ProgramType::GS] = 0;
|
||||
}
|
||||
|
||||
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
|
||||
const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
|
||||
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
current_shaders[ProgramType::FS] = handle;
|
||||
shader_hashes[ProgramType::FS] = config.Hash();
|
||||
}
|
||||
|
||||
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
|
||||
const vk::DescriptorImageInfo image_info = {
|
||||
.imageView = image_view,
|
||||
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
|
||||
};
|
||||
|
||||
SetBinding(1, binding, DescriptorData{image_info});
|
||||
}
|
||||
|
||||
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
|
||||
const vk::DescriptorImageInfo image_info = {
|
||||
.imageView = image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral
|
||||
};
|
||||
|
||||
SetBinding(3, binding, DescriptorData{image_info});
|
||||
}
|
||||
|
||||
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
|
||||
const DescriptorData data = {
|
||||
.buffer_info = vk::DescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.range = size
|
||||
}
|
||||
};
|
||||
|
||||
SetBinding(0, binding, data);
|
||||
}
|
||||
|
||||
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
|
||||
const DescriptorData data = {
|
||||
.buffer_view = buffer_view
|
||||
};
|
||||
|
||||
SetBinding(0, binding, data);
|
||||
}
|
||||
|
||||
void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
|
||||
const DescriptorData data = {
|
||||
.image_info = vk::DescriptorImageInfo{
|
||||
.sampler = sampler
|
||||
}
|
||||
};
|
||||
|
||||
SetBinding(2, binding, data);
|
||||
}
|
||||
|
||||
void PipelineCache::SetViewport(float x, float y, float width, float height) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f});
|
||||
}
|
||||
|
||||
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}});
|
||||
}
|
||||
|
||||
void PipelineCache::MarkDirty() {
|
||||
descriptor_dirty.fill(true);
|
||||
current_pipeline = VK_NULL_HANDLE;
|
||||
timestamp = scheduler.GetHostFenceCounter();
|
||||
}
|
||||
|
||||
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
|
||||
if (instance.IsExtendedDynamicStateSupported()) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(info.rasterization.topology));
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::SetBinding(u32 set, u32 binding, DescriptorData data) {
|
||||
if (update_data[set][binding] != data) {
|
||||
update_data[set][binding] = data;
|
||||
descriptor_dirty[set] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::BuildLayout() {
|
||||
std::array<vk::DescriptorSetLayoutBinding, MAX_DESCRIPTORS> set_bindings;
|
||||
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
|
||||
const auto& set = RASTERIZER_SETS[i];
|
||||
for (u32 j = 0; j < set.binding_count; j++) {
|
||||
vk::DescriptorType type = set.bindings[j];
|
||||
set_bindings[j] = vk::DescriptorSetLayoutBinding{
|
||||
.binding = j,
|
||||
.descriptorType = type,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = ToVkStageFlags(type)
|
||||
};
|
||||
|
||||
update_entries[j] = vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = j,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = type,
|
||||
.offset = j * sizeof(DescriptorData),
|
||||
.stride = 0
|
||||
};
|
||||
}
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo layout_info = {
|
||||
.bindingCount = set.binding_count,
|
||||
.pBindings = set_bindings.data()
|
||||
};
|
||||
|
||||
// Create descriptor set layout
|
||||
descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info);
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
|
||||
.descriptorUpdateEntryCount = set.binding_count,
|
||||
.pDescriptorUpdateEntries = update_entries.data(),
|
||||
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
|
||||
.descriptorSetLayout = descriptor_set_layouts[i]
|
||||
};
|
||||
|
||||
// Create descriptor set update template
|
||||
update_templates[i] = device.createDescriptorUpdateTemplate(template_info);
|
||||
}
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = RASTERIZER_SET_COUNT,
|
||||
.pSetLayouts = descriptor_set_layouts.data(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr
|
||||
};
|
||||
|
||||
layout = device.createPipelineLayout(layout_info);
|
||||
}
|
||||
|
||||
vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
u32 shader_count = 0;
|
||||
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
|
||||
for (std::size_t i = 0; i < current_shaders.size(); i++) {
|
||||
vk::ShaderModule shader = current_shaders[i];
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
|
||||
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
|
||||
.stage = ToVkShaderStage(i),
|
||||
.module = shader,
|
||||
.pName = "main"
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing
|
||||
* data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate.
|
||||
* Since one instance is all we render, the shader will always read the single attribute.
|
||||
*/
|
||||
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
|
||||
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
|
||||
const auto& binding = info.vertex_layout.bindings[i];
|
||||
bindings[i] = vk::VertexInputBindingDescription{
|
||||
.binding = binding.binding,
|
||||
.stride = binding.stride,
|
||||
.inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance
|
||||
: vk::VertexInputRate::eVertex
|
||||
};
|
||||
}
|
||||
|
||||
// Populate vertex attribute structures
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
|
||||
const auto& attr = info.vertex_layout.attributes[i];
|
||||
attributes[i] = vk::VertexInputAttributeDescription{
|
||||
.location = attr.location,
|
||||
.binding = attr.binding,
|
||||
.format = ToVkAttributeFormat(attr),
|
||||
.offset = attr.offset
|
||||
};
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
|
||||
.pVertexAttributeDescriptions = attributes.data()
|
||||
};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
|
||||
.primitiveRestartEnable = false
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
|
||||
.frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode),
|
||||
.depthBiasEnable = false,
|
||||
.lineWidth = 1.0f
|
||||
};
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
.sampleShadingEnable = false
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
|
||||
.blendEnable = info.blending.blend_enable.Value(),
|
||||
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
|
||||
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
|
||||
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
|
||||
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
|
||||
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
|
||||
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
|
||||
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = info.blending.logic_op_enable.Value(),
|
||||
.logicOp = PicaToVK::LogicOp(info.blending.logic_op),
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &colorblend_attachment,
|
||||
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}
|
||||
};
|
||||
|
||||
const vk::Viewport viewport = {
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = 1.0f,
|
||||
.height = 1.0f,
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f
|
||||
};
|
||||
|
||||
const vk::Rect2D scissor = {
|
||||
.offset = {0, 0},
|
||||
.extent = {1, 1}
|
||||
};
|
||||
|
||||
vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = {
|
||||
.negativeOneToOne = true
|
||||
};
|
||||
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info = {
|
||||
.pNext = &depth_clip_control,
|
||||
.viewportCount = 1,
|
||||
.pViewports = &viewport,
|
||||
.scissorCount = 1,
|
||||
.pScissors = &scissor,
|
||||
};
|
||||
|
||||
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
|
||||
const std::array dynamic_states = {
|
||||
vk::DynamicState::eViewport,
|
||||
vk::DynamicState::eScissor,
|
||||
vk::DynamicState::eStencilCompareMask,
|
||||
vk::DynamicState::eStencilWriteMask,
|
||||
vk::DynamicState::eStencilReference,
|
||||
vk::DynamicState::eBlendConstants,
|
||||
// VK_EXT_extended_dynamic_state
|
||||
vk::DynamicState::eCullModeEXT,
|
||||
vk::DynamicState::eDepthCompareOpEXT,
|
||||
vk::DynamicState::eDepthTestEnableEXT,
|
||||
vk::DynamicState::eDepthWriteEnableEXT,
|
||||
vk::DynamicState::eFrontFaceEXT,
|
||||
vk::DynamicState::ePrimitiveTopologyEXT,
|
||||
vk::DynamicState::eStencilOpEXT,
|
||||
vk::DynamicState::eStencilTestEnableEXT,
|
||||
};
|
||||
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||
.dynamicStateCount =
|
||||
extended_dynamic_states ? static_cast<u32>(dynamic_states.size()) : 6u,
|
||||
.pDynamicStates = dynamic_states.data()
|
||||
};
|
||||
|
||||
const vk::StencilOpState stencil_op_state = {
|
||||
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
|
||||
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
|
||||
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
|
||||
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)
|
||||
};
|
||||
|
||||
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
|
||||
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
|
||||
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
|
||||
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
|
||||
.depthBoundsTestEnable = false,
|
||||
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
|
||||
.front = stencil_op_state,
|
||||
.back = stencil_op_state
|
||||
};
|
||||
|
||||
const vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||
.stageCount = shader_count,
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = &vertex_input_info,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
.pDepthStencilState = &depth_info,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_info,
|
||||
.layout = layout,
|
||||
.renderPass = renderpass_cache.GetRenderpass(info.color_attachment,
|
||||
info.depth_attachment, false)
|
||||
};
|
||||
|
||||
if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info);
|
||||
result.result == vk::Result::eSuccess) {
|
||||
return result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static_assert(sizeof(vk::DescriptorBufferInfo) == sizeof(VkDescriptorBufferInfo));
|
||||
|
||||
void PipelineCache::BindDescriptorSets() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
|
||||
if (descriptor_dirty[i] || !descriptor_sets[i]) {
|
||||
const vk::DescriptorSetAllocateInfo alloc_info = {
|
||||
.descriptorPool = scheduler.GetDescriptorPool(),
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &descriptor_set_layouts[i]
|
||||
};
|
||||
|
||||
vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0];
|
||||
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]);
|
||||
|
||||
descriptor_sets[i] = set;
|
||||
descriptor_dirty[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Bind the descriptor sets
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, RASTERIZER_SET_COUNT,
|
||||
descriptor_sets.data(), 0, nullptr);
|
||||
}
|
||||
|
||||
void PipelineCache::LoadDiskCache() {
|
||||
if (!EnsureDirectories()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string cache_file_path = GetPipelineCacheDir() + DIR_SEP "pipelines.bin";
|
||||
vk::PipelineCacheCreateInfo cache_info = {
|
||||
.initialDataSize = 0,
|
||||
.pInitialData = nullptr
|
||||
};
|
||||
|
||||
FileUtil::IOFile cache_file{cache_file_path, "r"};
|
||||
if (cache_file.IsOpen()) {
|
||||
LOG_INFO(Render_Vulkan, "Loading pipeline cache");
|
||||
|
||||
const u32 cache_file_size = cache_file.GetSize();
|
||||
auto cache_data = std::vector<u8>(cache_file_size);
|
||||
if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
|
||||
if (!IsCacheValid(cache_data.data(), cache_file_size)) {
|
||||
LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid");
|
||||
} else {
|
||||
cache_info.initialDataSize = cache_file_size;
|
||||
cache_info.pInitialData = cache_data.data();
|
||||
}
|
||||
}
|
||||
|
||||
cache_file.Close();
|
||||
}
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
pipeline_cache = device.createPipelineCache(cache_info);
|
||||
}
|
||||
|
||||
void PipelineCache::SaveDiskCache() {
|
||||
if (!EnsureDirectories()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string cache_file_path = GetPipelineCacheDir() + DIR_SEP "pipelines.bin";
|
||||
FileUtil::IOFile cache_file{cache_file_path, "wb"};
|
||||
if (!cache_file.IsOpen()) {
|
||||
LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
auto cache_data = device.getPipelineCacheData(pipeline_cache);
|
||||
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
|
||||
LOG_WARNING(Render_Vulkan, "Error during pipeline cache write");
|
||||
return;
|
||||
}
|
||||
|
||||
cache_file.Close();
|
||||
}
|
||||
|
||||
bool PipelineCache::IsCacheValid(const u8* data, u32 size) const {
|
||||
if (size < sizeof(vk::PipelineCacheHeaderVersionOne)) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");
|
||||
return false;
|
||||
}
|
||||
|
||||
vk::PipelineCacheHeaderVersionOne header;
|
||||
std::memcpy(&header, data, sizeof(header));
|
||||
if (header.headerSize < sizeof(header)) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})",
|
||||
header.vendorID, vendor_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})",
|
||||
header.deviceID, device_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::EnsureDirectories() const {
|
||||
const auto CreateDir = [](const std::string& dir) {
|
||||
if (!FileUtil::CreateDir(dir)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
|
||||
CreateDir(GetPipelineCacheDir());
|
||||
}
|
||||
|
||||
std::string PipelineCache::GetPipelineCacheDir() const {
|
||||
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan";
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
268
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
268
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
@ -0,0 +1,268 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/hash.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
#include "video_core/shader/shader_cache.h"
|
||||
#include "video_core/regs.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 MAX_SHADER_STAGES = 3;
|
||||
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr u32 MAX_VERTEX_BINDINGS = 16;
|
||||
constexpr u32 MAX_DESCRIPTORS = 8;
|
||||
constexpr u32 MAX_DESCRIPTOR_SETS = 6;
|
||||
|
||||
enum class AttribType : u32 {
|
||||
Float = 0,
|
||||
Int = 1,
|
||||
Short = 2,
|
||||
Byte = 3,
|
||||
Ubyte = 4
|
||||
};
|
||||
|
||||
/**
|
||||
* The pipeline state is tightly packed with bitfields to reduce
|
||||
* the overhead of hashing as much as possible
|
||||
*/
|
||||
union RasterizationState {
|
||||
u8 value = 0;
|
||||
BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology;
|
||||
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
|
||||
};
|
||||
|
||||
struct DepthStencilState {
|
||||
union {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<1, 1, u32> depth_write_enable;
|
||||
BitField<2, 1, u32> stencil_test_enable;
|
||||
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
|
||||
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
|
||||
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
|
||||
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
|
||||
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
|
||||
};
|
||||
|
||||
// These are dynamic state so keep them separate
|
||||
u8 stencil_reference;
|
||||
u8 stencil_compare_mask;
|
||||
u8 stencil_write_mask;
|
||||
};
|
||||
|
||||
union BlendingState {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> blend_enable;
|
||||
BitField<1, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor;
|
||||
BitField<5, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor;
|
||||
BitField<9, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq;
|
||||
BitField<12, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor;
|
||||
BitField<16, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor;
|
||||
BitField<20, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq;
|
||||
BitField<23, 4, u32> color_write_mask;
|
||||
BitField<27, 1, u32> logic_op_enable;
|
||||
BitField<28, 4, Pica::FramebufferRegs::LogicOp> logic_op;
|
||||
};
|
||||
|
||||
union VertexBinding {
|
||||
u16 value = 0;
|
||||
BitField<0, 4, u16> binding;
|
||||
BitField<4, 1, u16> fixed;
|
||||
BitField<5, 11, u16> stride;
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 value = 0;
|
||||
BitField<0, 4, u32> binding;
|
||||
BitField<4, 4, u32> location;
|
||||
BitField<8, 3, AttribType> type;
|
||||
BitField<11, 3, u32> size;
|
||||
BitField<14, 11, u32> offset;
|
||||
};
|
||||
|
||||
struct VertexLayout {
|
||||
u8 binding_count;
|
||||
u8 attribute_count;
|
||||
std::array<VertexBinding, MAX_VERTEX_BINDINGS> bindings;
|
||||
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
};
|
||||
|
||||
/**
|
||||
* Information about a graphics/compute pipeline
|
||||
*/
|
||||
struct PipelineInfo {
|
||||
VertexLayout vertex_layout{};
|
||||
BlendingState blending{};
|
||||
VideoCore::PixelFormat color_attachment = VideoCore::PixelFormat::RGBA8;
|
||||
VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
|
||||
RasterizationState rasterization{};
|
||||
DepthStencilState depth_stencil{};
|
||||
|
||||
bool IsDepthWriteEnabled() const {
|
||||
const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8;
|
||||
const bool depth_write =
|
||||
depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
|
||||
const bool stencil_write =
|
||||
has_stencil && depth_stencil.stencil_test_enable && depth_stencil.stencil_write_mask != 0;
|
||||
|
||||
return depth_write || stencil_write;
|
||||
}
|
||||
};
|
||||
|
||||
union DescriptorData {
|
||||
vk::DescriptorImageInfo image_info;
|
||||
vk::DescriptorBufferInfo buffer_info;
|
||||
vk::BufferView buffer_view;
|
||||
|
||||
bool operator!=(const DescriptorData& other) const {
|
||||
return std::memcmp(this, &other, sizeof(DescriptorData)) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
|
||||
|
||||
/**
|
||||
* Vulkan specialized PICA shader caches
|
||||
*/
|
||||
using ProgrammableVertexShaders =
|
||||
Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule, &Compile, &GenerateVertexShader>;
|
||||
|
||||
using FixedGeometryShaders =
|
||||
Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule, &Compile, &GenerateFixedGeometryShader>;
|
||||
|
||||
using FragmentShaders =
|
||||
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
|
||||
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
class RenderpassCache;
|
||||
|
||||
/**
|
||||
* Stores a collection of rasterizer pipelines used during rendering.
|
||||
* In addition handles descriptor set management.
|
||||
*/
|
||||
class PipelineCache {
|
||||
public:
|
||||
PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache);
|
||||
~PipelineCache();
|
||||
|
||||
/// Binds a pipeline using the provided information
|
||||
void BindPipeline(const PipelineInfo& info);
|
||||
|
||||
/// Binds a PICA decompiled vertex shader
|
||||
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
/// Binds a passthrough vertex shader
|
||||
void UseTrivialVertexShader();
|
||||
|
||||
/// Binds a PICA decompiled geometry shader
|
||||
void UseFixedGeometryShader(const Pica::Regs& regs);
|
||||
|
||||
/// Binds a passthrough geometry shader
|
||||
void UseTrivialGeometryShader();
|
||||
|
||||
/// Binds a fragment shader generated from PICA state
|
||||
void UseFragmentShader(const Pica::Regs& regs);
|
||||
|
||||
/// Binds a texture to the specified binding
|
||||
void BindTexture(u32 binding, vk::ImageView image_view);
|
||||
|
||||
/// Binds a storage image to the specified binding
|
||||
void BindStorageImage(u32 binding, vk::ImageView image_view);
|
||||
|
||||
/// Binds a buffer to the specified binding
|
||||
void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size);
|
||||
|
||||
/// Binds a buffer to the specified binding
|
||||
void BindTexelBuffer(u32 binding, vk::BufferView buffer_view);
|
||||
|
||||
/// Binds a sampler to the specified binding
|
||||
void BindSampler(u32 binding, vk::Sampler sampler);
|
||||
|
||||
/// Sets the viewport rectangle to the provided values
|
||||
void SetViewport(float x, float y, float width, float height);
|
||||
|
||||
/// Sets the scissor rectange to the provided values
|
||||
void SetScissor(s32 x, s32 y, u32 width, u32 height);
|
||||
|
||||
/// Marks all cached pipeline cache state as dirty
|
||||
void MarkDirty();
|
||||
|
||||
private:
|
||||
/// Binds a resource to the provided binding
|
||||
void SetBinding(u32 set, u32 binding, DescriptorData data);
|
||||
|
||||
/// Applies dynamic pipeline state to the current command buffer
|
||||
void ApplyDynamic(const PipelineInfo& info);
|
||||
|
||||
/// Builds the rasterizer pipeline layout
|
||||
void BuildLayout();
|
||||
|
||||
/// Builds a rasterizer pipeline using the PipelineInfo struct
|
||||
vk::Pipeline BuildPipeline(const PipelineInfo& info);
|
||||
|
||||
/// Builds descriptor sets that reference the currently bound resources
|
||||
void BindDescriptorSets();
|
||||
|
||||
/// Loads the pipeline cache stored to disk
|
||||
void LoadDiskCache();
|
||||
|
||||
/// Stores the generated pipeline cache to disk
|
||||
void SaveDiskCache();
|
||||
|
||||
/// Returns true when the disk data can be used by the current driver
|
||||
bool IsCacheValid(const u8* data, u32 size) const;
|
||||
|
||||
/// Create shader disk cache directories. Returns true on success.
|
||||
bool EnsureDirectories() const;
|
||||
|
||||
/// Returns the pipeline cache storage dir
|
||||
std::string GetPipelineCacheDir() const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
|
||||
// Cached pipelines
|
||||
vk::PipelineCache pipeline_cache;
|
||||
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
|
||||
vk::Pipeline current_pipeline{};
|
||||
|
||||
// Cached layouts for the rasterizer pipelines
|
||||
vk::PipelineLayout layout;
|
||||
std::array<vk::DescriptorSetLayout, MAX_DESCRIPTOR_SETS> descriptor_set_layouts;
|
||||
std::array<vk::DescriptorUpdateTemplate, MAX_DESCRIPTOR_SETS> update_templates;
|
||||
|
||||
// Current data for the descriptor sets
|
||||
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
|
||||
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
|
||||
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
|
||||
u64 timestamp = 0;
|
||||
|
||||
// Bound shader modules
|
||||
enum ProgramType : u32 {
|
||||
VS = 0,
|
||||
GS = 2,
|
||||
FS = 1
|
||||
};
|
||||
|
||||
std::array<vk::ShaderModule, MAX_SHADER_STAGES> current_shaders;
|
||||
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
|
||||
ProgrammableVertexShaders programmable_vertex_shaders;
|
||||
FixedGeometryShaders fixed_geometry_shaders;
|
||||
FragmentShaders fragment_shaders;
|
||||
vk::ShaderModule trivial_vertex_shader;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
131
src/video_core/renderer_vulkan/vk_platform.cpp
Normal file
131
src/video_core/renderer_vulkan/vk_platform.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Include the vulkan platform specific header
|
||||
#if defined(ANDROID) || defined (__ANDROID__)
|
||||
#define VK_USE_PLATFORM_ANDROID_KHR
|
||||
#elif defined(_WIN32)
|
||||
#define VK_USE_PLATFORM_WIN32_KHR
|
||||
#elif defined(__APPLE__)
|
||||
#define VK_USE_PLATFORM_MACOS_MVK
|
||||
#define VK_USE_PLATFORM_METAL_EXT
|
||||
#else
|
||||
#define VK_USE_PLATFORM_WAYLAND_KHR
|
||||
#define VK_USE_PLATFORM_XLIB_KHR
|
||||
#endif
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window) {
|
||||
const auto& window_info = emu_window.GetWindowInfo();
|
||||
vk::SurfaceKHR surface{};
|
||||
|
||||
// Perform instance function loading here, to also load window system functions
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance);
|
||||
|
||||
#if defined(VK_USE_PLATFORM_WIN32_KHR)
|
||||
if (window_info.type == Frontend::WindowSystemType::Windows) {
|
||||
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
|
||||
.hinstance = nullptr,
|
||||
.hwnd = static_cast<HWND>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR)
|
||||
if (window_info.type == Frontend::WindowSystemType::X11) {
|
||||
const vk::XlibSurfaceCreateInfoKHR xlib_ci = {
|
||||
.dpy = static_cast<Display*>(window_info.display_connection),
|
||||
.window = reinterpret_cast<Window>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (window_info.type == Frontend::WindowSystemType::Wayland) {
|
||||
const vk::WaylandSurfaceCreateInfoKHR wayland_ci = {
|
||||
.display = static_cast<wl_display*>(window_info.display_connection),
|
||||
.surface = static_cast<wl_surface*>(window_info.render_surface)
|
||||
};
|
||||
|
||||
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!surface) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
|
||||
}
|
||||
|
||||
return surface;
|
||||
}
|
||||
|
||||
std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
|
||||
const auto properties = vk::enumerateInstanceExtensionProperties();
|
||||
if (properties.empty()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
|
||||
return std::vector<const char*>{};
|
||||
}
|
||||
|
||||
// Add the windowing system specific extension
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(6);
|
||||
|
||||
switch (window_type) {
|
||||
case Frontend::WindowSystemType::Headless:
|
||||
break;
|
||||
#if defined(VK_USE_PLATFORM_WIN32_KHR)
|
||||
case Frontend::WindowSystemType::Windows:
|
||||
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR)
|
||||
case Frontend::WindowSystemType::X11:
|
||||
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
case Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
break;
|
||||
}
|
||||
|
||||
if (window_type != Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (enable_debug_utils) {
|
||||
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
for (const char* extension : extensions) {
|
||||
const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
|
||||
return std::strcmp(extension, prop.extensionName) == 0;
|
||||
});
|
||||
|
||||
if (iter == properties.end()) {
|
||||
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
|
||||
return std::vector<const char*>{};
|
||||
}
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
22
src/video_core/renderer_vulkan/vk_platform.h
Normal file
22
src/video_core/renderer_vulkan/vk_platform.h
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
enum class WindowSystemType : u8;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils);
|
||||
|
||||
vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window);
|
||||
|
||||
} // namespace Vulkan
|
2153
src/video_core/renderer_vulkan/vk_rasterizer.cpp
Normal file
2153
src/video_core/renderer_vulkan/vk_rasterizer.cpp
Normal file
File diff suppressed because it is too large
Load Diff
317
src/video_core/renderer_vulkan/vk_rasterizer.h
Normal file
317
src/video_core/renderer_vulkan/vk_rasterizer.h
Normal file
@ -0,0 +1,317 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/vector_math.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
class RenderpassCache;
|
||||
|
||||
struct SamplerInfo {
|
||||
using TextureConfig = Pica::TexturingRegs::TextureConfig;
|
||||
TextureConfig::TextureFilter mag_filter;
|
||||
TextureConfig::TextureFilter min_filter;
|
||||
TextureConfig::TextureFilter mip_filter;
|
||||
TextureConfig::WrapMode wrap_s;
|
||||
TextureConfig::WrapMode wrap_t;
|
||||
u32 border_color = 0;
|
||||
u32 lod_min = 0;
|
||||
u32 lod_max = 0;
|
||||
s32 lod_bias = 0;
|
||||
|
||||
// TODO(wwylele): remove this once mipmap for cube is implemented
|
||||
bool supress_mipmap_for_cube = false;
|
||||
|
||||
auto operator<=>(const SamplerInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct FramebufferInfo {
|
||||
vk::ImageView color;
|
||||
vk::ImageView depth;
|
||||
vk::RenderPass renderpass;
|
||||
u32 width = 1;
|
||||
u32 height = 1;
|
||||
|
||||
auto operator<=>(const FramebufferInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::SamplerInfo> {
|
||||
std::size_t operator()(const Vulkan::SamplerInfo& info) const noexcept {
|
||||
return Common::ComputeHash64(&info, sizeof(Vulkan::SamplerInfo));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::FramebufferInfo> {
|
||||
std::size_t operator()(const Vulkan::FramebufferInfo& info) const noexcept {
|
||||
return Common::ComputeHash64(&info, sizeof(Vulkan::FramebufferInfo));
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
|
||||
friend class RendererVulkan;
|
||||
public:
|
||||
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, TaskScheduler& scheduler,
|
||||
TextureRuntime& runtime, RenderpassCache& renderpass_cache);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
void DrawTriangles() override;
|
||||
void NotifyPicaRegisterChanged(u32 id) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(PAddr addr, u32 size) override;
|
||||
void InvalidateRegion(PAddr addr, u32 size) override;
|
||||
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
|
||||
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
|
||||
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
|
||||
u32 pixel_stride, ScreenInfo& screen_info);
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
|
||||
/// Syncs entire status to match PICA registers
|
||||
void SyncEntireState() override;
|
||||
|
||||
/// Sync fixed function pipeline state
|
||||
void SyncFixedState();
|
||||
|
||||
/// Flushes all rasterizer owned buffers
|
||||
void FlushBuffers();
|
||||
|
||||
private:
|
||||
/// Syncs the clip enabled status to match the PICA register
|
||||
void SyncClipEnabled();
|
||||
|
||||
/// Syncs the clip coefficients to match the PICA register
|
||||
void SyncClipCoef();
|
||||
|
||||
/// Sets the OpenGL shader in accordance with the current PICA register state
|
||||
void SetShader();
|
||||
|
||||
/// Syncs the cull mode to match the PICA register
|
||||
void SyncCullMode();
|
||||
|
||||
/// Syncs the depth scale to match the PICA register
|
||||
void SyncDepthScale();
|
||||
|
||||
/// Syncs the depth offset to match the PICA register
|
||||
void SyncDepthOffset();
|
||||
|
||||
/// Syncs the blend enabled status to match the PICA register
|
||||
void SyncBlendEnabled();
|
||||
|
||||
/// Syncs the blend functions to match the PICA register
|
||||
void SyncBlendFuncs();
|
||||
|
||||
/// Syncs the blend color to match the PICA register
|
||||
void SyncBlendColor();
|
||||
|
||||
/// Syncs the fog states to match the PICA register
|
||||
void SyncFogColor();
|
||||
|
||||
/// Sync the procedural texture noise configuration to match the PICA register
|
||||
void SyncProcTexNoise();
|
||||
|
||||
/// Sync the procedural texture bias configuration to match the PICA register
|
||||
void SyncProcTexBias();
|
||||
|
||||
/// Syncs the alpha test states to match the PICA register
|
||||
void SyncAlphaTest();
|
||||
|
||||
/// Syncs the logic op states to match the PICA register
|
||||
void SyncLogicOp();
|
||||
|
||||
/// Syncs the color write mask to match the PICA register state
|
||||
void SyncColorWriteMask();
|
||||
|
||||
/// Syncs the stencil write mask to match the PICA register state
|
||||
void SyncStencilWriteMask();
|
||||
|
||||
/// Syncs the depth write mask to match the PICA register state
|
||||
void SyncDepthWriteMask();
|
||||
|
||||
/// Syncs the stencil test states to match the PICA register
|
||||
void SyncStencilTest();
|
||||
|
||||
/// Syncs the depth test states to match the PICA register
|
||||
void SyncDepthTest();
|
||||
|
||||
/// Syncs the TEV combiner color buffer to match the PICA register
|
||||
void SyncCombinerColor();
|
||||
|
||||
/// Syncs the TEV constant color to match the PICA register
|
||||
void SyncTevConstColor(std::size_t tev_index,
|
||||
const Pica::TexturingRegs::TevStageConfig& tev_stage);
|
||||
|
||||
/// Syncs the lighting global ambient color to match the PICA register
|
||||
void SyncGlobalAmbient();
|
||||
|
||||
/// Syncs the specified light's specular 0 color to match the PICA register
|
||||
void SyncLightSpecular0(int light_index);
|
||||
|
||||
/// Syncs the specified light's specular 1 color to match the PICA register
|
||||
void SyncLightSpecular1(int light_index);
|
||||
|
||||
/// Syncs the specified light's diffuse color to match the PICA register
|
||||
void SyncLightDiffuse(int light_index);
|
||||
|
||||
/// Syncs the specified light's ambient color to match the PICA register
|
||||
void SyncLightAmbient(int light_index);
|
||||
|
||||
/// Syncs the specified light's position to match the PICA register
|
||||
void SyncLightPosition(int light_index);
|
||||
|
||||
/// Syncs the specified spot light direcition to match the PICA register
|
||||
void SyncLightSpotDirection(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation bias to match the PICA register
|
||||
void SyncLightDistanceAttenuationBias(int light_index);
|
||||
|
||||
/// Syncs the specified light's distance attenuation scale to match the PICA register
|
||||
void SyncLightDistanceAttenuationScale(int light_index);
|
||||
|
||||
/// Syncs the shadow rendering bias to match the PICA register
|
||||
void SyncShadowBias();
|
||||
|
||||
/// Syncs the shadow texture bias to match the PICA register
|
||||
void SyncShadowTextureBias();
|
||||
|
||||
/// Syncs and uploads the lighting, fog and proctex LUTs
|
||||
void SyncAndUploadLUTs();
|
||||
void SyncAndUploadLUTsLF();
|
||||
|
||||
/// Upload the uniform blocks to the uniform buffer object
|
||||
void UploadUniforms(bool accelerate_draw);
|
||||
|
||||
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
|
||||
bool Draw(bool accelerate, bool is_indexed);
|
||||
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
|
||||
|
||||
/// Setup vertex shader for AccelerateDrawBatch
|
||||
bool SetupVertexShader();
|
||||
|
||||
/// Setup geometry shader for AccelerateDrawBatch
|
||||
bool SetupGeometryShader();
|
||||
|
||||
/// Creates a new sampler object
|
||||
vk::Sampler CreateSampler(const SamplerInfo& info);
|
||||
|
||||
/// Creates a new Vulkan framebuffer object
|
||||
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
TextureRuntime& runtime;
|
||||
RenderpassCache& renderpass_cache;
|
||||
RasterizerCache res_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
bool shader_dirty = true;
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
||||
|
||||
constexpr static VertexLayout GetVertexLayout();
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
ImageAlloc default_texture;
|
||||
vk::Sampler default_sampler;
|
||||
|
||||
struct {
|
||||
Pica::Shader::UniformData data{};
|
||||
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
|
||||
bool lighting_lut_dirty_any = true;
|
||||
bool fog_lut_dirty = true;
|
||||
bool proctex_noise_lut_dirty = true;
|
||||
bool proctex_color_map_dirty = true;
|
||||
bool proctex_alpha_map_dirty = true;
|
||||
bool proctex_lut_dirty = true;
|
||||
bool proctex_diff_lut_dirty = true;
|
||||
bool dirty = true;
|
||||
} uniform_block_data = {};
|
||||
|
||||
std::array<bool, 16> hw_enabled_attributes{};
|
||||
|
||||
std::array<SamplerInfo, 3> texture_samplers;
|
||||
SamplerInfo texture_cube_sampler;
|
||||
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
|
||||
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
|
||||
|
||||
StreamBuffer vertex_buffer;
|
||||
StreamBuffer uniform_buffer;
|
||||
StreamBuffer index_buffer;
|
||||
StreamBuffer texture_buffer;
|
||||
StreamBuffer texture_lf_buffer;
|
||||
PipelineInfo pipeline_info;
|
||||
std::size_t uniform_buffer_alignment;
|
||||
std::size_t uniform_size_aligned_vs;
|
||||
std::size_t uniform_size_aligned_fs;
|
||||
|
||||
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
||||
lighting_lut_data{};
|
||||
std::array<Common::Vec2f, 128> fog_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_color_map_data{};
|
||||
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_lut_data{};
|
||||
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
196
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
196
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
@ -0,0 +1,196 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormatColor(u32 index) {
|
||||
switch (index) {
|
||||
case 0: return vk::Format::eR8G8B8A8Unorm;
|
||||
case 1: return vk::Format::eR8G8B8Unorm;
|
||||
case 2: return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case 3: return vk::Format::eR5G6B5UnormPack16;
|
||||
case 4: return vk::Format::eR4G4B4A4UnormPack16;
|
||||
default: return vk::Format::eUndefined;
|
||||
}
|
||||
}
|
||||
|
||||
vk::Format ToVkFormatDepth(u32 index) {
|
||||
switch (index) {
|
||||
case 0: return vk::Format::eD16Unorm;
|
||||
case 1: return vk::Format::eX8D24UnormPack32;
|
||||
// Notice the similar gap in PixelFormat
|
||||
case 3: return vk::Format::eD24UnormS8Uint;
|
||||
default: return vk::Format::eUndefined;
|
||||
}
|
||||
}
|
||||
|
||||
RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler} {
|
||||
// Pre-create all needed renderpasses by the renderer
|
||||
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
|
||||
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
|
||||
const vk::Format color_format =
|
||||
instance.GetFormatAlternative(ToVkFormatColor(color));
|
||||
const vk::Format depth_stencil_format =
|
||||
instance.GetFormatAlternative(ToVkFormatDepth(depth));
|
||||
|
||||
if (color_format == vk::Format::eUndefined &&
|
||||
depth_stencil_format == vk::Format::eUndefined) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format,
|
||||
vk::AttachmentLoadOp::eLoad,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::ImageLayout::eColorAttachmentOptimal);
|
||||
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format,
|
||||
vk::AttachmentLoadOp::eClear,
|
||||
vk::ImageLayout::eColorAttachmentOptimal,
|
||||
vk::ImageLayout::eColorAttachmentOptimal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RenderpassCache::~RenderpassCache() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
|
||||
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
|
||||
if (vk::RenderPass load_pass = cached_renderpasses[color][depth][0]; load_pass) {
|
||||
device.destroyRenderPass(load_pass);
|
||||
}
|
||||
|
||||
if (vk::RenderPass clear_pass = cached_renderpasses[color][depth][1]; clear_pass) {
|
||||
device.destroyRenderPass(clear_pass);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
device.destroyRenderPass(present_renderpass);
|
||||
}
|
||||
|
||||
void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) {
|
||||
if (active_renderpass == begin_info.renderPass) {
|
||||
return;
|
||||
}
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
if (active_renderpass) {
|
||||
command_buffer.endRenderPass();
|
||||
}
|
||||
|
||||
command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline);
|
||||
active_renderpass = begin_info.renderPass;
|
||||
}
|
||||
|
||||
void RenderpassCache::ExitRenderpass() {
|
||||
if (!active_renderpass) {
|
||||
return;
|
||||
}
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.endRenderPass();
|
||||
active_renderpass = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void RenderpassCache::CreatePresentRenderpass(vk::Format format) {
|
||||
if (!present_renderpass) {
|
||||
present_renderpass = CreateRenderPass(format, vk::Format::eUndefined,
|
||||
vk::AttachmentLoadOp::eClear,
|
||||
vk::ImageLayout::eUndefined,
|
||||
vk::ImageLayout::ePresentSrcKHR);
|
||||
}
|
||||
}
|
||||
|
||||
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
|
||||
bool is_clear) const {
|
||||
const u32 color_index =
|
||||
color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast<u32>(color);
|
||||
const u32 depth_index =
|
||||
depth == VideoCore::PixelFormat::Invalid ? MAX_DEPTH_FORMATS : (static_cast<u32>(depth) - 14);
|
||||
|
||||
ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS);
|
||||
return cached_renderpasses[color_index][depth_index][is_clear];
|
||||
}
|
||||
|
||||
vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
|
||||
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const {
|
||||
// Define attachments
|
||||
u32 attachment_count = 0;
|
||||
std::array<vk::AttachmentDescription, 2> attachments;
|
||||
|
||||
bool use_color = false;
|
||||
vk::AttachmentReference color_attachment_ref{};
|
||||
bool use_depth = false;
|
||||
vk::AttachmentReference depth_attachment_ref{};
|
||||
|
||||
if (color != vk::Format::eUndefined) {
|
||||
attachments[attachment_count] = vk::AttachmentDescription{
|
||||
.format = color,
|
||||
.loadOp = load_op,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
|
||||
.initialLayout = initial_layout,
|
||||
.finalLayout = final_layout
|
||||
};
|
||||
|
||||
color_attachment_ref = vk::AttachmentReference{
|
||||
.attachment = attachment_count++,
|
||||
.layout = vk::ImageLayout::eColorAttachmentOptimal
|
||||
};
|
||||
|
||||
use_color = true;
|
||||
}
|
||||
|
||||
if (depth != vk::Format::eUndefined) {
|
||||
attachments[attachment_count] = vk::AttachmentDescription{
|
||||
.format = depth,
|
||||
.loadOp = load_op,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
|
||||
.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal,
|
||||
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
};
|
||||
|
||||
depth_attachment_ref = vk::AttachmentReference{
|
||||
.attachment = attachment_count++,
|
||||
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
|
||||
};
|
||||
|
||||
use_depth = true;
|
||||
}
|
||||
|
||||
// We also require only one subpass
|
||||
const vk::SubpassDescription subpass = {
|
||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = use_color ? 1u : 0u,
|
||||
.pColorAttachments = &color_attachment_ref,
|
||||
.pResolveAttachments = 0,
|
||||
.pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr
|
||||
};
|
||||
|
||||
const vk::RenderPassCreateInfo renderpass_info = {
|
||||
.attachmentCount = attachment_count,
|
||||
.pAttachments = attachments.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr
|
||||
};
|
||||
|
||||
// Create the renderpass
|
||||
vk::Device device = instance.GetDevice();
|
||||
return device.createRenderPass(renderpass_info);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Vulkan
|
55
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
55
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
@ -0,0 +1,55 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
|
||||
constexpr u32 MAX_COLOR_FORMATS = 5;
|
||||
constexpr u32 MAX_DEPTH_FORMATS = 4;
|
||||
|
||||
class RenderpassCache {
|
||||
public:
|
||||
RenderpassCache(const Instance& instance, TaskScheduler& scheduler);
|
||||
~RenderpassCache();
|
||||
|
||||
/// Begins a new renderpass only when no other renderpass is currently active
|
||||
void EnterRenderpass(const vk::RenderPassBeginInfo begin_info);
|
||||
|
||||
/// Exits from any currently active renderpass instance
|
||||
void ExitRenderpass();
|
||||
|
||||
/// Returns the renderpass associated with the color-depth format pair
|
||||
[[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
|
||||
bool is_clear) const;
|
||||
|
||||
/// Returns the swapchain clear renderpass
|
||||
[[nodiscard]] vk::RenderPass GetPresentRenderpass() const {
|
||||
return present_renderpass;
|
||||
}
|
||||
|
||||
/// Creates the renderpass used when rendering to the swapchain
|
||||
void CreatePresentRenderpass(vk::Format format);
|
||||
|
||||
private:
|
||||
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
|
||||
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
|
||||
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
|
||||
vk::RenderPass active_renderpass = VK_NULL_HANDLE;
|
||||
vk::RenderPass present_renderpass{};
|
||||
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2];
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
223
src/video_core/renderer_vulkan/vk_shader.cpp
Normal file
223
src/video_core/renderer_vulkan/vk_shader.cpp
Normal file
@ -0,0 +1,223 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <glslang/Include/ResourceLimits.h>
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr TBuiltInResource DefaultTBuiltInResource = {
|
||||
.maxLights = 32,
|
||||
.maxClipPlanes = 6,
|
||||
.maxTextureUnits = 32,
|
||||
.maxTextureCoords = 32,
|
||||
.maxVertexAttribs = 64,
|
||||
.maxVertexUniformComponents = 4096,
|
||||
.maxVaryingFloats = 64,
|
||||
.maxVertexTextureImageUnits = 32,
|
||||
.maxCombinedTextureImageUnits = 80,
|
||||
.maxTextureImageUnits = 32,
|
||||
.maxFragmentUniformComponents = 4096,
|
||||
.maxDrawBuffers = 32,
|
||||
.maxVertexUniformVectors = 128,
|
||||
.maxVaryingVectors = 8,
|
||||
.maxFragmentUniformVectors = 16,
|
||||
.maxVertexOutputVectors = 16,
|
||||
.maxFragmentInputVectors = 15,
|
||||
.minProgramTexelOffset = -8,
|
||||
.maxProgramTexelOffset = 7,
|
||||
.maxClipDistances = 8,
|
||||
.maxComputeWorkGroupCountX = 65535,
|
||||
.maxComputeWorkGroupCountY = 65535,
|
||||
.maxComputeWorkGroupCountZ = 65535,
|
||||
.maxComputeWorkGroupSizeX = 1024,
|
||||
.maxComputeWorkGroupSizeY = 1024,
|
||||
.maxComputeWorkGroupSizeZ = 64,
|
||||
.maxComputeUniformComponents = 1024,
|
||||
.maxComputeTextureImageUnits = 16,
|
||||
.maxComputeImageUniforms = 8,
|
||||
.maxComputeAtomicCounters = 8,
|
||||
.maxComputeAtomicCounterBuffers = 1,
|
||||
.maxVaryingComponents = 60,
|
||||
.maxVertexOutputComponents = 64,
|
||||
.maxGeometryInputComponents = 64,
|
||||
.maxGeometryOutputComponents = 128,
|
||||
.maxFragmentInputComponents = 128,
|
||||
.maxImageUnits = 8,
|
||||
.maxCombinedImageUnitsAndFragmentOutputs = 8,
|
||||
.maxCombinedShaderOutputResources = 8,
|
||||
.maxImageSamples = 0,
|
||||
.maxVertexImageUniforms = 0,
|
||||
.maxTessControlImageUniforms = 0,
|
||||
.maxTessEvaluationImageUniforms = 0,
|
||||
.maxGeometryImageUniforms = 0,
|
||||
.maxFragmentImageUniforms = 8,
|
||||
.maxCombinedImageUniforms = 8,
|
||||
.maxGeometryTextureImageUnits = 16,
|
||||
.maxGeometryOutputVertices = 256,
|
||||
.maxGeometryTotalOutputComponents = 1024,
|
||||
.maxGeometryUniformComponents = 1024,
|
||||
.maxGeometryVaryingComponents = 64,
|
||||
.maxTessControlInputComponents = 128,
|
||||
.maxTessControlOutputComponents = 128,
|
||||
.maxTessControlTextureImageUnits = 16,
|
||||
.maxTessControlUniformComponents = 1024,
|
||||
.maxTessControlTotalOutputComponents = 4096,
|
||||
.maxTessEvaluationInputComponents = 128,
|
||||
.maxTessEvaluationOutputComponents = 128,
|
||||
.maxTessEvaluationTextureImageUnits = 16,
|
||||
.maxTessEvaluationUniformComponents = 1024,
|
||||
.maxTessPatchComponents = 120,
|
||||
.maxPatchVertices = 32,
|
||||
.maxTessGenLevel = 64,
|
||||
.maxViewports = 16,
|
||||
.maxVertexAtomicCounters = 0,
|
||||
.maxTessControlAtomicCounters = 0,
|
||||
.maxTessEvaluationAtomicCounters = 0,
|
||||
.maxGeometryAtomicCounters = 0,
|
||||
.maxFragmentAtomicCounters = 8,
|
||||
.maxCombinedAtomicCounters = 8,
|
||||
.maxAtomicCounterBindings = 1,
|
||||
.maxVertexAtomicCounterBuffers = 0,
|
||||
.maxTessControlAtomicCounterBuffers = 0,
|
||||
.maxTessEvaluationAtomicCounterBuffers = 0,
|
||||
.maxGeometryAtomicCounterBuffers = 0,
|
||||
.maxFragmentAtomicCounterBuffers = 1,
|
||||
.maxCombinedAtomicCounterBuffers = 1,
|
||||
.maxAtomicCounterBufferSize = 16384,
|
||||
.maxTransformFeedbackBuffers = 4,
|
||||
.maxTransformFeedbackInterleavedComponents = 64,
|
||||
.maxCullDistances = 8,
|
||||
.maxCombinedClipAndCullDistances = 8,
|
||||
.maxSamples = 4,
|
||||
.maxMeshOutputVerticesNV = 256,
|
||||
.maxMeshOutputPrimitivesNV = 512,
|
||||
.maxMeshWorkGroupSizeX_NV = 32,
|
||||
.maxMeshWorkGroupSizeY_NV = 1,
|
||||
.maxMeshWorkGroupSizeZ_NV = 1,
|
||||
.maxTaskWorkGroupSizeX_NV = 32,
|
||||
.maxTaskWorkGroupSizeY_NV = 1,
|
||||
.maxTaskWorkGroupSizeZ_NV = 1,
|
||||
.maxMeshViewCountNV = 4,
|
||||
.maxDualSourceDrawBuffersEXT = 1,
|
||||
.limits = TLimits{
|
||||
.nonInductiveForLoops = 1,
|
||||
.whileLoops = 1,
|
||||
.doWhileLoops = 1,
|
||||
.generalUniformIndexing = 1,
|
||||
.generalAttributeMatrixVectorIndexing = 1,
|
||||
.generalVaryingIndexing = 1,
|
||||
.generalSamplerIndexing = 1,
|
||||
.generalVariableIndexing = 1,
|
||||
.generalConstantMatrixVectorIndexing = 1,
|
||||
}
|
||||
};
|
||||
|
||||
EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
|
||||
switch (stage) {
|
||||
case vk::ShaderStageFlagBits::eVertex:
|
||||
return EShLanguage::EShLangVertex;
|
||||
case vk::ShaderStageFlagBits::eGeometry:
|
||||
return EShLanguage::EShLangGeometry;
|
||||
case vk::ShaderStageFlagBits::eFragment:
|
||||
return EShLanguage::EShLangFragment;
|
||||
case vk::ShaderStageFlagBits::eCompute:
|
||||
return EShLanguage::EShLangCompute;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return EShLanguage::EShLangVertex;
|
||||
}
|
||||
|
||||
bool InitializeCompiler() {
|
||||
static bool glslang_initialized = false;
|
||||
|
||||
if (glslang_initialized) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!glslang::InitializeProcess()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::atexit([]() { glslang::FinalizeProcess(); });
|
||||
|
||||
glslang_initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
|
||||
ShaderOptimization level) {
|
||||
if (!InitializeCompiler()) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
EProfile profile = ECoreProfile;
|
||||
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
|
||||
EShLanguage lang = ToEshShaderStage(stage);
|
||||
|
||||
int default_version = 450;
|
||||
const char* pass_source_code = code.data();
|
||||
int pass_source_code_length = static_cast<int>(code.size());
|
||||
|
||||
auto shader = std::make_unique<glslang::TShader>(lang);
|
||||
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
|
||||
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
|
||||
|
||||
glslang::TShader::ForbidIncluder includer;
|
||||
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Even though there's only a single shader, we still need to link it to generate SPV
|
||||
auto program = std::make_unique<glslang::TProgram>();
|
||||
program->addShader(shader.get());
|
||||
if (!program->link(messages)) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
|
||||
std::vector<u32> out_code;
|
||||
spv::SpvBuildLogger logger;
|
||||
glslang::SpvOptions options;
|
||||
|
||||
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
|
||||
if (level == ShaderOptimization::Debug) {
|
||||
intermediate->addSourceText(pass_source_code, pass_source_code_length);
|
||||
options.generateDebugInfo = true;
|
||||
options.disableOptimizer = true;
|
||||
options.optimizeSize = false;
|
||||
options.disassemble = false;
|
||||
options.validate = true;
|
||||
} else {
|
||||
options.disableOptimizer = false;
|
||||
options.stripDebugInfo = true;
|
||||
}
|
||||
|
||||
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
|
||||
|
||||
const std::string spv_messages = logger.getAllMessages();
|
||||
if (!spv_messages.empty()) {
|
||||
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
|
||||
}
|
||||
|
||||
const vk::ShaderModuleCreateInfo shader_info = {
|
||||
.codeSize = out_code.size() * sizeof(u32),
|
||||
.pCode = out_code.data()
|
||||
};
|
||||
|
||||
return device.createShaderModule(shader_info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
19
src/video_core/renderer_vulkan/vk_shader.h
Normal file
19
src/video_core/renderer_vulkan/vk_shader.h
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum class ShaderOptimization {
|
||||
High = 0,
|
||||
Debug = 1
|
||||
};
|
||||
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage,
|
||||
vk::Device device, ShaderOptimization level);
|
||||
|
||||
} // namespace Vulkan
|
1758
src/video_core/renderer_vulkan/vk_shader_gen.cpp
Normal file
1758
src/video_core/renderer_vulkan/vk_shader_gen.cpp
Normal file
File diff suppressed because it is too large
Load Diff
247
src/video_core/renderer_vulkan/vk_shader_gen.h
Normal file
247
src/video_core/renderer_vulkan/vk_shader_gen.h
Normal file
@ -0,0 +1,247 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
ATTRIBUTE_TEXCOORD0,
|
||||
ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2,
|
||||
ATTRIBUTE_TEXCOORD0_W,
|
||||
ATTRIBUTE_NORMQUAT,
|
||||
ATTRIBUTE_VIEW,
|
||||
};
|
||||
|
||||
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
|
||||
struct TevStageConfigRaw {
|
||||
u32 sources_raw;
|
||||
u32 modifiers_raw;
|
||||
u32 ops_raw;
|
||||
u32 scales_raw;
|
||||
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
|
||||
Pica::TexturingRegs::TevStageConfig stage;
|
||||
stage.sources_raw = sources_raw;
|
||||
stage.modifiers_raw = modifiers_raw;
|
||||
stage.ops_raw = ops_raw;
|
||||
stage.const_color = 0;
|
||||
stage.scales_raw = scales_raw;
|
||||
return stage;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaFSConfigState {
|
||||
Pica::FramebufferRegs::CompareFunc alpha_test_func;
|
||||
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
|
||||
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
|
||||
bool texture2_use_coord1;
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
u8 combiner_buffer_input;
|
||||
|
||||
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
|
||||
Pica::TexturingRegs::FogMode fog_mode;
|
||||
bool fog_flip;
|
||||
bool alphablend_enable;
|
||||
Pica::FramebufferRegs::LogicOp logic_op;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
unsigned num;
|
||||
bool directional;
|
||||
bool two_sided_diffuse;
|
||||
bool dist_atten_enable;
|
||||
bool spot_atten_enable;
|
||||
bool geometric_factor_0;
|
||||
bool geometric_factor_1;
|
||||
bool shadow_enable;
|
||||
} light[8];
|
||||
|
||||
bool enable;
|
||||
unsigned src_num;
|
||||
Pica::LightingRegs::LightingBumpMode bump_mode;
|
||||
unsigned bump_selector;
|
||||
bool bump_renorm;
|
||||
bool clamp_highlights;
|
||||
|
||||
Pica::LightingRegs::LightingConfig config;
|
||||
bool enable_primary_alpha;
|
||||
bool enable_secondary_alpha;
|
||||
|
||||
bool enable_shadow;
|
||||
bool shadow_primary;
|
||||
bool shadow_secondary;
|
||||
bool shadow_invert;
|
||||
bool shadow_alpha;
|
||||
unsigned shadow_selector;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
bool abs_input;
|
||||
Pica::LightingRegs::LightingLutInput type;
|
||||
float scale;
|
||||
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
|
||||
struct {
|
||||
bool enable;
|
||||
u32 coord;
|
||||
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
|
||||
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
|
||||
bool separate_alpha;
|
||||
bool noise_enable;
|
||||
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
|
||||
u32 lut_width;
|
||||
u32 lut_offset0;
|
||||
u32 lut_offset1;
|
||||
u32 lut_offset2;
|
||||
u32 lut_offset3;
|
||||
u32 lod_min;
|
||||
u32 lod_max;
|
||||
Pica::TexturingRegs::ProcTexFilter lut_filter;
|
||||
} proctex;
|
||||
|
||||
bool shadow_rendering;
|
||||
bool shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains all state used to generate the GLSL fragment shader that emulates the
|
||||
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
|
||||
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
|
||||
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
|
||||
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
|
||||
* two separate shaders sharing the same key.
|
||||
*/
|
||||
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
|
||||
/// Construct a PicaFSConfig with the given Pica register configuration.
|
||||
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
|
||||
|
||||
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
|
||||
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
|
||||
}
|
||||
|
||||
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
|
||||
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains common information to identify a GL vertex/geometry shader generated from
|
||||
* PICA vertex/geometry shader.
|
||||
*/
|
||||
struct PicaShaderConfigCommon {
|
||||
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
u64 program_hash;
|
||||
u64 swizzle_hash;
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
// output_map[output register index] -> output attribute index
|
||||
std::array<u32, 16> output_map;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL vertex shader generated from PICA vertex
|
||||
* shader.
|
||||
*/
|
||||
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
|
||||
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
state.Init(regs, setup);
|
||||
}
|
||||
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
|
||||
state = conf;
|
||||
}
|
||||
};
|
||||
|
||||
struct PicaGSConfigCommonRaw {
|
||||
void Init(const Pica::Regs& regs);
|
||||
|
||||
u32 vs_output_attributes;
|
||||
u32 gs_output_attributes;
|
||||
|
||||
struct SemanticMap {
|
||||
u32 attribute_index;
|
||||
u32 component_index;
|
||||
};
|
||||
|
||||
// semantic_maps[semantic name] -> GS output attribute index + component index
|
||||
std::array<SemanticMap, 24> semantic_maps;
|
||||
};
|
||||
|
||||
/**
|
||||
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
|
||||
* shader pipeline
|
||||
*/
|
||||
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
|
||||
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
|
||||
state.Init(regs);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
|
||||
* and directly passes them to the fragment shader.
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateTrivialVertexShader();
|
||||
|
||||
/**
|
||||
* Generates the GLSL vertex shader program source code for the given VS program
|
||||
* @returns String of the shader source code; boost::none on failure
|
||||
*/
|
||||
std::optional<std::string> GenerateVertexShader(
|
||||
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config);
|
||||
|
||||
/**
|
||||
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config);
|
||||
|
||||
/**
|
||||
* Generates the GLSL fragment shader program source code for the current Pica state
|
||||
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
|
||||
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
|
||||
* @param separable_shader generates shader that can be used for separate shader object
|
||||
* @returns String of the shader source code
|
||||
*/
|
||||
std::string GenerateFragmentShader(const PicaFSConfig& config);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<Vulkan::PicaFSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PicaVSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::PicaFixedGSConfig> {
|
||||
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
} // namespace std
|
194
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
194
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
@ -0,0 +1,194 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
|
||||
std::pair<vk::AccessFlags, vk::PipelineStageFlags> result{};
|
||||
switch (usage) {
|
||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead,
|
||||
vk::PipelineStageFlagBits::eVertexInput);
|
||||
break;
|
||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eIndexRead,
|
||||
vk::PipelineStageFlagBits::eVertexInput);
|
||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eUniformRead,
|
||||
vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eGeometryShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
||||
result = std::make_pair(vk::AccessFlagBits::eShaderRead,
|
||||
vk::PipelineStageFlagBits::eFragmentShader);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
|
||||
: instance{instance} {
|
||||
const vk::BufferCreateInfo buffer_info = {
|
||||
.size = size,
|
||||
.usage = usage
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
|
||||
&unsafe_buffer, &allocation, &alloc_info);
|
||||
|
||||
buffer = vk::Buffer{unsafe_buffer};
|
||||
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
|
||||
}
|
||||
|
||||
StagingBuffer::~StagingBuffer() {
|
||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
||||
}
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
|
||||
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats)
|
||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, vk::BufferUsageFlagBits::eTransferSrc},
|
||||
usage{usage}, total_size{size} {
|
||||
|
||||
const vk::BufferCreateInfo buffer_info = {
|
||||
.size = total_size,
|
||||
.usage = usage | vk::BufferUsageFlagBits::eTransferDst
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
|
||||
&unsafe_buffer, &allocation, &alloc_info);
|
||||
|
||||
buffer = vk::Buffer{unsafe_buffer};
|
||||
|
||||
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (std::size_t i = 0; i < view_formats.size(); i++) {
|
||||
const vk::BufferViewCreateInfo view_info = {
|
||||
.buffer = buffer,
|
||||
.format = view_formats[i],
|
||||
.offset = 0,
|
||||
.range = total_size
|
||||
};
|
||||
|
||||
views[i] = device.createBufferView(view_info);
|
||||
}
|
||||
|
||||
view_count = view_formats.size();
|
||||
bucket_size = size / SCHEDULER_COMMAND_COUNT;
|
||||
}
|
||||
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
if (buffer) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
||||
for (std::size_t i = 0; i < view_count; i++) {
|
||||
device.destroyBufferView(views[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
||||
ASSERT(size <= total_size && alignment <= total_size);
|
||||
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
auto& bucket = buckets[current_bucket];
|
||||
if (bucket.offset + size > bucket_size) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
if (bucket.invalid) {
|
||||
invalidate = true;
|
||||
bucket.invalid = false;
|
||||
}
|
||||
|
||||
const u32 buffer_offset = current_bucket * bucket_size + bucket.offset;
|
||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
||||
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u32 size) {
|
||||
buckets[scheduler.GetCurrentSlotIndex()].offset += size;
|
||||
}
|
||||
|
||||
void StreamBuffer::Flush() {
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
const u32 flush_size = buckets[current_bucket].offset;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
const u32 flush_start = current_bucket * bucket_size;
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_start,
|
||||
.dstOffset = flush_start,
|
||||
.size = flush_size
|
||||
};
|
||||
|
||||
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
|
||||
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
||||
|
||||
// Add pipeline barrier for the flushed region
|
||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = access_mask,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = buffer,
|
||||
.offset = flush_start,
|
||||
.size = flush_size
|
||||
};
|
||||
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
||||
}
|
||||
|
||||
// Reset the offset of the next bucket
|
||||
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
|
||||
buckets[next_bucket].offset = 0;
|
||||
buckets[next_bucket].invalid = true;
|
||||
}
|
||||
|
||||
u32 StreamBuffer::GetBufferOffset() const {
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
return current_bucket * bucket_size + buckets[current_bucket].offset;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
91
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
91
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <span>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
|
||||
constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||
|
||||
struct LockedRegion {
|
||||
u32 size = 0;
|
||||
u64 fence_counter = 0;
|
||||
};
|
||||
|
||||
struct StagingBuffer {
|
||||
StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
|
||||
~StagingBuffer();
|
||||
|
||||
const Instance& instance;
|
||||
vk::Buffer buffer{};
|
||||
VmaAllocation allocation{};
|
||||
std::span<std::byte> mapped{};
|
||||
};
|
||||
|
||||
class StreamBuffer {
|
||||
public:
|
||||
StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
|
||||
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
|
||||
~StreamBuffer();
|
||||
|
||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
||||
|
||||
/// Commits size bytes from the currently mapped staging memory
|
||||
void Commit(u32 size = 0);
|
||||
|
||||
/// Flushes staging memory to the GPU buffer
|
||||
void Flush();
|
||||
|
||||
/// Returns the current buffer offset
|
||||
u32 GetBufferOffset() const;
|
||||
|
||||
/// Returns the Vulkan buffer handle
|
||||
vk::Buffer GetHandle() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the requested buffer view
|
||||
const vk::BufferView& GetView(u32 index = 0) const {
|
||||
ASSERT(index < view_count);
|
||||
return views[index];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Invalidates the buffer offsets
|
||||
void Invalidate();
|
||||
|
||||
/// Removes the lock on regions whose fence counter has been reached by the GPU
|
||||
bool UnlockFreeRegions(u32 target_size);
|
||||
|
||||
private:
|
||||
struct Bucket {
|
||||
bool invalid;
|
||||
u32 fence_counter;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
StagingBuffer staging;
|
||||
|
||||
vk::Buffer buffer{};
|
||||
VmaAllocation allocation{};
|
||||
vk::BufferUsageFlagBits usage;
|
||||
u32 total_size = 0;
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||
std::size_t view_count = 0;
|
||||
|
||||
u32 bucket_size = 0;
|
||||
std::array<Bucket, SCHEDULER_COMMAND_COUNT> buckets{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
232
src/video_core/renderer_vulkan/vk_swapchain.cpp
Normal file
232
src/video_core/renderer_vulkan/vk_swapchain.cpp
Normal file
@ -0,0 +1,232 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include <algorithm>
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache)
|
||||
: instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} {
|
||||
|
||||
// Set the surface format early for RenderpassCache to create the present renderpass
|
||||
Configure(0, 0);
|
||||
renderpass_cache.CreatePresentRenderpass(surface_format.format);
|
||||
}
|
||||
|
||||
Swapchain::~Swapchain() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroySwapchainKHR(swapchain);
|
||||
|
||||
for (auto& image : swapchain_images) {
|
||||
device.destroyImageView(image.image_view);
|
||||
device.destroyFramebuffer(image.framebuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
is_outdated = false;
|
||||
is_suboptimal = false;
|
||||
|
||||
// Fetch information about the provided surface
|
||||
Configure(width, height);
|
||||
|
||||
const std::array queue_family_indices = {
|
||||
instance.GetGraphicsQueueFamilyIndex(),
|
||||
instance.GetPresentQueueFamilyIndex(),
|
||||
};
|
||||
|
||||
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
|
||||
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
|
||||
const vk::SharingMode sharing_mode =
|
||||
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
|
||||
const vk::SwapchainCreateInfoKHR swapchain_info = {
|
||||
.surface = surface,
|
||||
.minImageCount = image_count,
|
||||
.imageFormat = surface_format.format,
|
||||
.imageColorSpace = surface_format.colorSpace,
|
||||
.imageExtent = extent,
|
||||
.imageArrayLayers = 1,
|
||||
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
|
||||
.imageSharingMode = sharing_mode,
|
||||
.queueFamilyIndexCount = queue_family_indices_count,
|
||||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.presentMode = present_mode,
|
||||
.clipped = true,
|
||||
.oldSwapchain = swapchain
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
|
||||
|
||||
// If an old swapchain exists, destroy it and move the new one to its place.
|
||||
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
|
||||
device.destroySwapchainKHR(old_swapchain);
|
||||
}
|
||||
|
||||
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
|
||||
auto images = device.getSwapchainImagesKHR(swapchain);
|
||||
|
||||
// Destroy the previous images
|
||||
for (auto& image : swapchain_images) {
|
||||
device.destroyImageView(image.image_view);
|
||||
device.destroyFramebuffer(image.framebuffer);
|
||||
}
|
||||
|
||||
swapchain_images.clear();
|
||||
swapchain_images.resize(images.size());
|
||||
|
||||
std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image {
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = surface_format.format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
}
|
||||
};
|
||||
|
||||
vk::ImageView image_view = device.createImageView(view_info);
|
||||
const std::array attachments{image_view};
|
||||
|
||||
const vk::FramebufferCreateInfo framebuffer_info = {
|
||||
.renderPass = present_renderpass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = attachments.data(),
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
.layers = 1
|
||||
};
|
||||
|
||||
vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info);
|
||||
|
||||
return Image{
|
||||
.image = image,
|
||||
.image_view = image_view,
|
||||
.framebuffer = framebuffer
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for maximum of 1 second
|
||||
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
|
||||
|
||||
void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
|
||||
VK_NULL_HANDLE, ¤t_image);
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eSuboptimalKHR:
|
||||
is_suboptimal = true;
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Present(vk::Semaphore wait_for_present) {
|
||||
const vk::PresentInfoKHR present_info = {
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &wait_for_present,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = ¤t_image
|
||||
};
|
||||
|
||||
vk::Queue present_queue = instance.GetPresentQueue();
|
||||
vk::Result result = present_queue.presentKHR(present_info);
|
||||
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eSuboptimalKHR:
|
||||
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
is_outdated = true;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
|
||||
break;
|
||||
}
|
||||
|
||||
current_frame = (current_frame + 1) % swapchain_images.size();
|
||||
}
|
||||
|
||||
void Swapchain::Configure(u32 width, u32 height) {
|
||||
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
|
||||
|
||||
// Choose surface format
|
||||
auto formats = physical.getSurfaceFormatsKHR(surface);
|
||||
surface_format = formats[0];
|
||||
|
||||
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
|
||||
surface_format.format = vk::Format::eB8G8R8A8Unorm;
|
||||
} else {
|
||||
auto it = std::ranges::find_if(formats, [](vk::SurfaceFormatKHR format) -> bool {
|
||||
return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
|
||||
format.format == vk::Format::eB8G8R8A8Unorm;
|
||||
});
|
||||
|
||||
if (it == formats.end()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
|
||||
} else {
|
||||
surface_format = *it;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if a particular mode is supported, if it is, returns that mode.
|
||||
auto modes = physical.getSurfacePresentModesKHR(surface);
|
||||
|
||||
// FIFO is guaranteed by the Vulkan standard to be available
|
||||
present_mode = vk::PresentModeKHR::eFifo;
|
||||
auto iter = std::ranges::find_if(modes, [](vk::PresentModeKHR mode) {
|
||||
return vk::PresentModeKHR::eMailbox == mode;
|
||||
});
|
||||
|
||||
// Prefer Mailbox if present for lowest latency
|
||||
if (iter != modes.end()) {
|
||||
present_mode = vk::PresentModeKHR::eMailbox;
|
||||
}
|
||||
|
||||
// Query surface extent
|
||||
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
|
||||
extent = capabilities.currentExtent;
|
||||
|
||||
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
|
||||
extent.width = std::clamp(width, capabilities.minImageExtent.width,
|
||||
capabilities.maxImageExtent.width);
|
||||
extent.height = std::clamp(height, capabilities.minImageExtent.height,
|
||||
capabilities.maxImageExtent.height);
|
||||
}
|
||||
|
||||
// Select number of images in swap chain, we prefer one buffer in the background to work on
|
||||
image_count = capabilities.minImageCount + 1;
|
||||
if (capabilities.maxImageCount > 0) {
|
||||
image_count = std::min(image_count, capabilities.maxImageCount);
|
||||
}
|
||||
|
||||
// Prefer identity transform if possible
|
||||
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
|
||||
if (!(capabilities.supportedTransforms & transform)) {
|
||||
transform = capabilities.currentTransform;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
91
src/video_core/renderer_vulkan/vk_swapchain.h
Normal file
91
src/video_core/renderer_vulkan/vk_swapchain.h
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
Swapchain(const Instance& instance, RenderpassCache& renderpass_cache);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height, bool vsync_enabled);
|
||||
|
||||
/// Acquires the next image in the swapchain.
|
||||
void AcquireNextImage(vk::Semaphore signal_acquired);
|
||||
|
||||
/// Presents the current image and move to the next one
|
||||
void Present(vk::Semaphore wait_for_present);
|
||||
|
||||
/// Returns current swapchain state
|
||||
vk::Extent2D GetExtent() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
/// Returns the swapchain surface
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
/// Returns the current framebuffe
|
||||
vk::Framebuffer GetFramebuffer() const {
|
||||
return swapchain_images[current_image].framebuffer;
|
||||
}
|
||||
|
||||
/// Returns the swapchain format
|
||||
vk::SurfaceFormatKHR GetSurfaceFormat() const {
|
||||
return surface_format;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan swapchain handle
|
||||
vk::SwapchainKHR GetHandle() const {
|
||||
return swapchain;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain should be recreated
|
||||
bool NeedsRecreation() const {
|
||||
return is_suboptimal || is_outdated;
|
||||
}
|
||||
|
||||
private:
|
||||
void Configure(u32 width, u32 height);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
RenderpassCache& renderpass_cache;
|
||||
vk::SwapchainKHR swapchain{};
|
||||
vk::SurfaceKHR surface{};
|
||||
|
||||
// Swapchain properties
|
||||
vk::SurfaceFormatKHR surface_format;
|
||||
vk::PresentModeKHR present_mode;
|
||||
vk::Extent2D extent;
|
||||
vk::SurfaceTransformFlagBitsKHR transform;
|
||||
u32 image_count;
|
||||
|
||||
struct Image {
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::Framebuffer framebuffer;
|
||||
};
|
||||
|
||||
// Swapchain state
|
||||
std::vector<Image> swapchain_images;
|
||||
u32 current_image = 0;
|
||||
u32 current_frame = 0;
|
||||
bool vsync_enabled = false;
|
||||
bool is_outdated = true;
|
||||
bool is_suboptimal = true;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
252
src/video_core/renderer_vulkan/vk_task_scheduler.cpp
Normal file
252
src/video_core/renderer_vulkan/vk_task_scheduler.cpp
Normal file
@ -0,0 +1,252 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
|
||||
vk::Device device = instance.GetDevice();
|
||||
const vk::CommandPoolCreateInfo command_pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
|
||||
};
|
||||
|
||||
command_pool = device.createCommandPool(command_pool_info);
|
||||
|
||||
// If supported, prefer timeline semaphores over binary ones
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
const vk::StructureChain timeline_info = {
|
||||
vk::SemaphoreCreateInfo{},
|
||||
vk::SemaphoreTypeCreateInfo{
|
||||
.semaphoreType = vk::SemaphoreType::eTimeline,
|
||||
.initialValue = 0
|
||||
}
|
||||
};
|
||||
|
||||
timeline = device.createSemaphore(timeline_info.get());
|
||||
}
|
||||
|
||||
constexpr std::array pool_sizes = {
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, 512},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048},
|
||||
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024}
|
||||
};
|
||||
|
||||
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
|
||||
.maxSets = 2048,
|
||||
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
|
||||
.pPoolSizes = pool_sizes.data()
|
||||
};
|
||||
|
||||
const vk::CommandBufferAllocateInfo buffer_info = {
|
||||
.commandPool = command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
|
||||
};
|
||||
|
||||
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
|
||||
for (std::size_t i = 0; i < commands.size(); i++) {
|
||||
commands[i] = ExecutionSlot{
|
||||
.image_acquired = device.createSemaphore({}),
|
||||
.present_ready = device.createSemaphore({}),
|
||||
.fence = device.createFence({}),
|
||||
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
|
||||
.render_command_buffer = command_buffers[2 * i],
|
||||
.upload_command_buffer = command_buffers[2 * i + 1],
|
||||
};
|
||||
}
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
// Begin first command
|
||||
auto& command = commands[current_command];
|
||||
command.render_command_buffer.begin(begin_info);
|
||||
command.fence_counter = next_fence_counter++;
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.waitIdle();
|
||||
|
||||
if (timeline) {
|
||||
device.destroySemaphore(timeline);
|
||||
}
|
||||
|
||||
for (const auto& command : commands) {
|
||||
device.destroyFence(command.fence);
|
||||
device.destroySemaphore(command.image_acquired);
|
||||
device.destroySemaphore(command.present_ready);
|
||||
device.destroyDescriptorPool(command.descriptor_pool);
|
||||
}
|
||||
|
||||
device.destroyCommandPool(command_pool);
|
||||
}
|
||||
|
||||
void TaskScheduler::Synchronize(u32 slot) {
|
||||
const auto& command = commands[slot];
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
||||
u32 completed_counter = completed_fence_counter;
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
completed_counter = device.getSemaphoreCounterValue(timeline);
|
||||
}
|
||||
|
||||
if (command.fence_counter > completed_counter) {
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
const vk::SemaphoreWaitInfo wait_info = {
|
||||
.semaphoreCount = 1,
|
||||
.pSemaphores = &timeline,
|
||||
.pValues = &command.fence_counter
|
||||
};
|
||||
|
||||
if (device.waitSemaphores(wait_info, UINT64_MAX) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} else if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter);
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
completed_fence_counter = command.fence_counter;
|
||||
device.resetFences(command.fence);
|
||||
device.resetDescriptorPool(command.descriptor_pool);
|
||||
}
|
||||
|
||||
void TaskScheduler::Submit(SubmitMode mode) {
|
||||
const auto& command = commands[current_command];
|
||||
command.render_command_buffer.end();
|
||||
if (command.use_upload_buffer) {
|
||||
command.upload_command_buffer.end();
|
||||
}
|
||||
|
||||
u32 command_buffer_count = 0;
|
||||
std::array<vk::CommandBuffer, 2> command_buffers;
|
||||
|
||||
if (command.use_upload_buffer) {
|
||||
command_buffers[command_buffer_count++] = command.upload_command_buffer;
|
||||
}
|
||||
|
||||
command_buffers[command_buffer_count++] = command.render_command_buffer;
|
||||
|
||||
const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
|
||||
const std::array wait_values{command.fence_counter - 1, 1ul};
|
||||
const std::array wait_semaphores{timeline, command.image_acquired};
|
||||
|
||||
const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
|
||||
const std::array signal_values{command.fence_counter, 0ul};
|
||||
const std::array signal_semaphores{timeline, command.present_ready};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
|
||||
.waitSemaphoreValueCount = wait_semaphore_count,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = signal_semaphore_count,
|
||||
.pSignalSemaphoreValues = signal_values.data()
|
||||
};
|
||||
|
||||
const std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = wait_semaphore_count,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = command_buffer_count,
|
||||
.pCommandBuffers = command_buffers.data(),
|
||||
.signalSemaphoreCount = signal_semaphore_count,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
||||
vk::Queue queue = instance.GetGraphicsQueue();
|
||||
queue.submit(submit_info);
|
||||
|
||||
} else {
|
||||
const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
|
||||
const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
|
||||
const vk::PipelineStageFlags wait_stage_masks =
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = wait_semaphore_count,
|
||||
.pWaitSemaphores = &command.image_acquired,
|
||||
.pWaitDstStageMask = &wait_stage_masks,
|
||||
.commandBufferCount = command_buffer_count,
|
||||
.pCommandBuffers = command_buffers.data(),
|
||||
.signalSemaphoreCount = signal_semaphore_count,
|
||||
.pSignalSemaphores = &command.present_ready,
|
||||
};
|
||||
|
||||
vk::Queue queue = instance.GetGraphicsQueue();
|
||||
queue.submit(submit_info, command.fence);
|
||||
}
|
||||
|
||||
// Block host until the GPU catches up
|
||||
if (True(mode & SubmitMode::Flush)) {
|
||||
Synchronize(current_command);
|
||||
}
|
||||
|
||||
// Switch to next cmdbuffer.
|
||||
if (False(mode & SubmitMode::Shutdown)) {
|
||||
SwitchSlot();
|
||||
}
|
||||
}
|
||||
|
||||
u64 TaskScheduler::GetFenceCounter() const {
|
||||
vk::Device device = instance.GetDevice();
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
return device.getSemaphoreCounterValue(timeline);
|
||||
}
|
||||
|
||||
return completed_fence_counter;
|
||||
}
|
||||
|
||||
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
|
||||
auto& command = commands[current_command];
|
||||
if (!command.use_upload_buffer) {
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
command.upload_command_buffer.begin(begin_info);
|
||||
command.use_upload_buffer = true;
|
||||
}
|
||||
|
||||
return command.upload_command_buffer;
|
||||
}
|
||||
|
||||
void TaskScheduler::SwitchSlot() {
|
||||
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
|
||||
auto& command = commands[current_command];
|
||||
|
||||
// Wait for the GPU to finish with all resources for this command.
|
||||
Synchronize(current_command);
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
};
|
||||
|
||||
// Begin the next command buffer.
|
||||
command.render_command_buffer.begin(begin_info);
|
||||
command.fence_counter = next_fence_counter++;
|
||||
command.use_upload_buffer = false;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
97
src/video_core/renderer_vulkan/vk_task_scheduler.h
Normal file
97
src/video_core/renderer_vulkan/vk_task_scheduler.h
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include "common/common_types.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Buffer;
|
||||
class Instance;
|
||||
|
||||
enum class SubmitMode : u8 {
|
||||
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
|
||||
Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
|
||||
Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
|
||||
};
|
||||
|
||||
DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
|
||||
|
||||
class TaskScheduler {
|
||||
public:
|
||||
TaskScheduler(const Instance& instance);
|
||||
~TaskScheduler();
|
||||
|
||||
/// Blocks the host until the current command completes execution
|
||||
void Synchronize(u32 slot);
|
||||
|
||||
/// Submits the current command to the graphics queue
|
||||
void Submit(SubmitMode mode);
|
||||
|
||||
/// Returns the last completed fence counter
|
||||
u64 GetFenceCounter() const;
|
||||
|
||||
/// Returns the command buffer used for early upload operations.
|
||||
vk::CommandBuffer GetUploadCommandBuffer();
|
||||
|
||||
/// Returns the command buffer used for rendering
|
||||
vk::CommandBuffer GetRenderCommandBuffer() const {
|
||||
return commands[current_command].render_command_buffer;
|
||||
}
|
||||
|
||||
/// Returns the current descriptor pool
|
||||
vk::DescriptorPool GetDescriptorPool() const {
|
||||
return commands[current_command].descriptor_pool;
|
||||
}
|
||||
|
||||
/// Returns the index of the current command slot
|
||||
u32 GetCurrentSlotIndex() const {
|
||||
return current_command;
|
||||
}
|
||||
|
||||
u64 GetHostFenceCounter() const {
|
||||
return next_fence_counter - 1;
|
||||
}
|
||||
|
||||
vk::Semaphore GetImageAcquiredSemaphore() const {
|
||||
return commands[current_command].image_acquired;
|
||||
}
|
||||
|
||||
vk::Semaphore GetPresentReadySemaphore() const {
|
||||
return commands[current_command].present_ready;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Activates the next command slot and optionally waits for its completion
|
||||
void SwitchSlot();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
u64 next_fence_counter = 1;
|
||||
u64 completed_fence_counter = 0;
|
||||
|
||||
struct ExecutionSlot {
|
||||
bool use_upload_buffer = false;
|
||||
u64 fence_counter = 0;
|
||||
vk::Semaphore image_acquired;
|
||||
vk::Semaphore present_ready;
|
||||
vk::Fence fence;
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::CommandBuffer render_command_buffer;
|
||||
vk::CommandBuffer upload_command_buffer;
|
||||
};
|
||||
|
||||
vk::CommandPool command_pool{};
|
||||
vk::Semaphore timeline{};
|
||||
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
|
||||
u32 current_command = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
705
src/video_core/renderer_vulkan/vk_texture_runtime.cpp
Normal file
705
src/video_core/renderer_vulkan/vk_texture_runtime.cpp
Normal file
@ -0,0 +1,705 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
|
||||
switch (format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return vk::Format::eR8G8B8Unorm;
|
||||
case VideoCore::PixelFormat::RGB5A1:
|
||||
return vk::Format::eR5G5B5A1UnormPack16;
|
||||
case VideoCore::PixelFormat::RGB565:
|
||||
return vk::Format::eR5G6B5UnormPack16;
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return vk::Format::eR4G4B4A4UnormPack16;
|
||||
case VideoCore::PixelFormat::D16:
|
||||
return vk::Format::eD16Unorm;
|
||||
case VideoCore::PixelFormat::D24:
|
||||
return vk::Format::eX8D24UnormPack32;
|
||||
case VideoCore::PixelFormat::D24S8:
|
||||
return vk::Format::eD24UnormS8Uint;
|
||||
case VideoCore::PixelFormat::Invalid:
|
||||
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
|
||||
return vk::Format::eUndefined;
|
||||
default:
|
||||
// Use default case for the texture formats
|
||||
return vk::Format::eR8G8B8A8Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
|
||||
switch (type) {
|
||||
case VideoCore::SurfaceType::Color:
|
||||
case VideoCore::SurfaceType::Texture:
|
||||
case VideoCore::SurfaceType::Fill:
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
case VideoCore::SurfaceType::Depth:
|
||||
return vk::ImageAspectFlagBits::eDepth;
|
||||
case VideoCore::SurfaceType::DepthStencil:
|
||||
return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid surface type!");
|
||||
}
|
||||
|
||||
return vk::ImageAspectFlagBits::eColor;
|
||||
}
|
||||
|
||||
vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) {
|
||||
switch (type) {
|
||||
case VideoCore::SurfaceType::Color:
|
||||
case VideoCore::SurfaceType::Texture:
|
||||
case VideoCore::SurfaceType::Fill:
|
||||
return vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
case VideoCore::SurfaceType::Depth:
|
||||
case VideoCore::SurfaceType::DepthStencil:
|
||||
return vk::FormatFeatureFlagBits::eDepthStencilAttachment;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid surface type!");
|
||||
}
|
||||
|
||||
return vk::FormatFeatureFlagBits::eColorAttachment;
|
||||
}
|
||||
|
||||
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
|
||||
|
||||
for (auto& buffer : staging_buffers) {
|
||||
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst);
|
||||
}
|
||||
}
|
||||
|
||||
TextureRuntime::~TextureRuntime() {
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.waitIdle();
|
||||
|
||||
for (const auto& [key, alloc] : texture_recycler) {
|
||||
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
|
||||
device.destroyImageView(alloc.image_view);
|
||||
}
|
||||
|
||||
for (const auto& [key, framebuffer] : clear_framebuffers) {
|
||||
device.destroyFramebuffer(framebuffer);
|
||||
}
|
||||
|
||||
texture_recycler.clear();
|
||||
}
|
||||
|
||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
const u32 offset = staging_offsets[current_slot];
|
||||
if (offset + size > STAGING_BUFFER_SIZE) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const auto& buffer = staging_buffers[current_slot];
|
||||
return StagingData{
|
||||
.buffer = buffer->buffer,
|
||||
.size = size,
|
||||
.mapped = buffer->mapped.subspan(offset, size),
|
||||
.buffer_offset = offset
|
||||
};
|
||||
}
|
||||
|
||||
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
|
||||
staging_offsets[new_slot] = 0;
|
||||
}
|
||||
|
||||
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
|
||||
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
const VideoCore::HostTextureTag key = {
|
||||
.format = format,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.layers = layers
|
||||
};
|
||||
|
||||
// Attempt to recycle an unused allocation
|
||||
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
|
||||
ImageAlloc alloc = std::move(it->second);
|
||||
texture_recycler.erase(it);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
// Create a new allocation
|
||||
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
|
||||
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
|
||||
|
||||
const u32 levels = std::bit_width(std::max(width, height));
|
||||
const vk::ImageCreateInfo image_info = {
|
||||
.flags = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageCreateFlagBits::eCubeCompatible :
|
||||
vk::ImageCreateFlags{},
|
||||
.imageType = vk::ImageType::e2D,
|
||||
.format = vk_format,
|
||||
.extent = {width, height, 1},
|
||||
.mipLevels = levels,
|
||||
.arrayLayers = layers,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = GetImageUsage(aspect),
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
|
||||
};
|
||||
|
||||
VkImage unsafe_image{};
|
||||
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
|
||||
VmaAllocation allocation;
|
||||
|
||||
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
|
||||
&unsafe_image, &allocation, nullptr);
|
||||
if (result != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
vk::Image image = vk::Image{unsafe_image};
|
||||
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageViewType::eCube :
|
||||
vk::ImageViewType::e2D,
|
||||
.format = vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = layers
|
||||
}
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::ImageView image_view = device.createImageView(view_info);
|
||||
|
||||
return ImageAlloc{
|
||||
.image = image,
|
||||
.image_view = image_view,
|
||||
.allocation = allocation,
|
||||
.aspect = aspect,
|
||||
.levels = levels,
|
||||
};
|
||||
}
|
||||
|
||||
void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc) {
|
||||
texture_recycler.emplace(tag, std::move(alloc));
|
||||
}
|
||||
|
||||
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest) {
|
||||
const VideoCore::SurfaceType type = VideoCore::GetFormatType(format);
|
||||
const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type);
|
||||
|
||||
if (format == VideoCore::PixelFormat::RGBA8) {
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::RGB8 && upload) {
|
||||
return Pica::Texture::ConvertBGRToRGBA(source, dest);
|
||||
} else if (instance.IsFormatSupported(ToVkFormat(format), feature)) {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format);
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
}
|
||||
}
|
||||
|
||||
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
VideoCore::ClearValue value) {
|
||||
const vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal,
|
||||
0, surface.alloc.levels, 0, surface.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
|
||||
|
||||
vk::ClearValue clear_value{};
|
||||
if (aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
clear_value.color = vk::ClearColorValue{
|
||||
.float32 = std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]})
|
||||
};
|
||||
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
clear_value.depthStencil = vk::ClearDepthStencilValue{
|
||||
.depth = value.depth,
|
||||
.stencil = value.stencil
|
||||
};
|
||||
}
|
||||
|
||||
// For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage
|
||||
if (clear.texture_rect == surface.GetScaledRect()) {
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = aspect,
|
||||
.baseMipLevel = clear.texture_level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
};
|
||||
|
||||
if (aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
command_buffer.clearColorImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
clear_value.color, range);
|
||||
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
command_buffer.clearDepthStencilImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
clear_value.depthStencil, range);
|
||||
}
|
||||
} else {
|
||||
// For partial clears we begin a clear renderpass with the appropriate render area
|
||||
vk::RenderPass clear_renderpass{};
|
||||
ImageAlloc& alloc = surface.alloc;
|
||||
if (aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
clear_renderpass = renderpass_cache.GetRenderpass(surface.pixel_format,
|
||||
VideoCore::PixelFormat::Invalid, true);
|
||||
Transition(command_buffer, alloc, vk::ImageLayout::eColorAttachmentOptimal, 0, alloc.levels);
|
||||
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
|
||||
surface.pixel_format, true);
|
||||
Transition(command_buffer, alloc, vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, alloc.levels);
|
||||
}
|
||||
|
||||
auto [it, new_framebuffer] = clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{});
|
||||
if (new_framebuffer) {
|
||||
const vk::FramebufferCreateInfo framebuffer_info = {
|
||||
.renderPass = clear_renderpass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &alloc.image_view,
|
||||
.width = surface.GetScaledWidth(),
|
||||
.height = surface.GetScaledHeight(),
|
||||
.layers = 1
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
it->second = device.createFramebuffer(framebuffer_info);
|
||||
}
|
||||
|
||||
const vk::RenderPassBeginInfo clear_begin_info = {
|
||||
.renderPass = clear_renderpass,
|
||||
.framebuffer = it->second,
|
||||
.renderArea = vk::Rect2D{
|
||||
.offset = {static_cast<s32>(clear.texture_rect.left), static_cast<s32>(clear.texture_rect.bottom)},
|
||||
.extent = {clear.texture_rect.GetWidth(), clear.texture_rect.GetHeight()}
|
||||
},
|
||||
.clearValueCount = 1,
|
||||
.pClearValues = &clear_value
|
||||
};
|
||||
|
||||
renderpass_cache.EnterRenderpass(clear_begin_info);
|
||||
renderpass_cache.ExitRenderpass();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
const vk::ImageCopy image_copy = {
|
||||
.srcSubresource = {
|
||||
.aspectMask = ToVkAspect(source.type),
|
||||
.mipLevel = copy.src_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
|
||||
.dstSubresource = {
|
||||
.aspectMask = ToVkAspect(dest.type),
|
||||
.mipLevel = copy.dst_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
|
||||
.extent = {copy.extent.width, copy.extent.height, 1}
|
||||
};
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0, source.alloc.levels);
|
||||
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0, dest.alloc.levels);
|
||||
|
||||
command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal,
|
||||
0, source.alloc.levels, 0, source.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
|
||||
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal,
|
||||
0, dest.alloc.levels, 0, dest.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
|
||||
|
||||
const std::array source_offsets = {
|
||||
vk::Offset3D{static_cast<s32>(blit.src_rect.left), static_cast<s32>(blit.src_rect.bottom), 0},
|
||||
vk::Offset3D{static_cast<s32>(blit.src_rect.right), static_cast<s32>(blit.src_rect.top), 1}
|
||||
};
|
||||
|
||||
const std::array dest_offsets = {
|
||||
vk::Offset3D{static_cast<s32>(blit.dst_rect.left), static_cast<s32>(blit.dst_rect.bottom), 0},
|
||||
vk::Offset3D{static_cast<s32>(blit.dst_rect.right), static_cast<s32>(blit.dst_rect.top), 1}
|
||||
};
|
||||
|
||||
const vk::ImageBlit blit_area = {
|
||||
.srcSubresource = {
|
||||
.aspectMask = ToVkAspect(source.type),
|
||||
.mipLevel = blit.src_level,
|
||||
.baseArrayLayer = blit.src_layer,
|
||||
.layerCount = 1
|
||||
},
|
||||
.srcOffsets = source_offsets,
|
||||
.dstSubresource = {
|
||||
.aspectMask = ToVkAspect(dest.type),
|
||||
.mipLevel = blit.dst_level,
|
||||
.baseArrayLayer = blit.dst_layer,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffsets = dest_offsets
|
||||
};
|
||||
|
||||
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
blit_area, vk::Filter::eLinear);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
// TODO: Investigate AMD single pass downsampler
|
||||
s32 current_width = surface.GetScaledWidth();
|
||||
s32 current_height = surface.GetScaledHeight();
|
||||
|
||||
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
|
||||
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
for (u32 i = 1; i < levels; i++) {
|
||||
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
|
||||
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1);
|
||||
|
||||
const std::array source_offsets = {
|
||||
vk::Offset3D{0, 0, 0},
|
||||
vk::Offset3D{current_width, current_height, 1}
|
||||
};
|
||||
|
||||
const std::array dest_offsets = {
|
||||
vk::Offset3D{0, 0, 0},
|
||||
vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
|
||||
current_height > 1 ? current_height / 2 : 1, 1}
|
||||
};
|
||||
|
||||
const vk::ImageBlit blit_area = {
|
||||
.srcSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = i - 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.srcOffsets = source_offsets,
|
||||
.dstSubresource = {
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = i,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffsets = dest_offsets
|
||||
};
|
||||
|
||||
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
blit_area, vk::Filter::eLinear);
|
||||
}
|
||||
}
|
||||
|
||||
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
|
||||
vk::ImageLayout new_layout, u32 level, u32 level_count,
|
||||
u32 layer, u32 layer_count) {
|
||||
if (new_layout == alloc.layout || !alloc.image) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct LayoutInfo {
|
||||
vk::AccessFlags access;
|
||||
vk::PipelineStageFlags stage;
|
||||
};
|
||||
|
||||
// Get optimal transition settings for every image layout. Settings taken from Dolphin
|
||||
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
|
||||
LayoutInfo info;
|
||||
switch (layout) {
|
||||
case vk::ImageLayout::eUndefined:
|
||||
// Layout undefined therefore contents undefined, and we don't care what happens to it.
|
||||
info.access = vk::AccessFlagBits::eNone;
|
||||
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
|
||||
break;
|
||||
case vk::ImageLayout::ePreinitialized:
|
||||
// Image has been pre-initialized by the host, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eHostWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eHost;
|
||||
break;
|
||||
case vk::ImageLayout::eColorAttachmentOptimal:
|
||||
// Image was being used as a color attachment, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eColorAttachmentRead |
|
||||
vk::AccessFlagBits::eColorAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
break;
|
||||
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
|
||||
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
|
||||
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
break;
|
||||
case vk::ImageLayout::ePresentSrcKHR:
|
||||
info.access = vk::AccessFlagBits::eNone;
|
||||
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
|
||||
break;
|
||||
case vk::ImageLayout::eShaderReadOnlyOptimal:
|
||||
// Image was being used as a shader resource, make sure all reads have finished.
|
||||
info.access = vk::AccessFlagBits::eShaderRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
|
||||
break;
|
||||
case vk::ImageLayout::eTransferSrcOptimal:
|
||||
// Image was being used as a copy source, ensure all reads have finished.
|
||||
info.access = vk::AccessFlagBits::eTransferRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eTransfer;
|
||||
break;
|
||||
case vk::ImageLayout::eTransferDstOptimal:
|
||||
// Image was being used as a copy destination, ensure all writes have finished.
|
||||
info.access = vk::AccessFlagBits::eTransferWrite;
|
||||
info.stage = vk::PipelineStageFlagBits::eTransfer;
|
||||
break;
|
||||
case vk::ImageLayout::eGeneral:
|
||||
info.access = vk::AccessFlagBits::eInputAttachmentRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
vk::PipelineStageFlagBits::eFragmentShader;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return info;
|
||||
};
|
||||
|
||||
LayoutInfo source = GetLayoutInfo(alloc.layout);
|
||||
LayoutInfo dest = GetLayoutInfo(new_layout);
|
||||
|
||||
const vk::ImageMemoryBarrier barrier = {
|
||||
.srcAccessMask = source.access,
|
||||
.dstAccessMask = dest.access,
|
||||
.oldLayout = alloc.layout,
|
||||
.newLayout = new_layout,
|
||||
.image = alloc.image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = alloc.aspect,
|
||||
.baseMipLevel = /*level*/0,
|
||||
.levelCount = /*level_count*/alloc.levels,
|
||||
.baseArrayLayer = layer,
|
||||
.layerCount = layer_count
|
||||
}
|
||||
};
|
||||
|
||||
command_buffer.pipelineBarrier(source.stage, dest.stage,
|
||||
vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, barrier);
|
||||
|
||||
alloc.layout = new_layout;
|
||||
}
|
||||
|
||||
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
|
||||
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
|
||||
scheduler{runtime.GetScheduler()} {
|
||||
|
||||
if (pixel_format != VideoCore::PixelFormat::Invalid) {
|
||||
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
|
||||
}
|
||||
}
|
||||
|
||||
Surface::~Surface() {
|
||||
if (pixel_format != VideoCore::PixelFormat::Invalid) {
|
||||
const VideoCore::HostTextureTag tag = {
|
||||
.format = pixel_format,
|
||||
.width = GetScaledWidth(),
|
||||
.height = GetScaledHeight(),
|
||||
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u
|
||||
};
|
||||
|
||||
runtime.Recycle(tag, std::move(alloc));
|
||||
}
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Upload);
|
||||
|
||||
runtime.renderpass_cache.ExitRenderpass();
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
ScaledUpload(upload);
|
||||
} else {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
const VideoCore::Rect2D rect = upload.texture_rect;
|
||||
const vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging.buffer_offset,
|
||||
.bufferRowLength = rect.GetWidth(),
|
||||
.bufferImageHeight = rect.GetHeight(),
|
||||
.imageSubresource = {
|
||||
.aspectMask = alloc.aspect,
|
||||
.mipLevel = upload.texture_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
|
||||
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
|
||||
};
|
||||
|
||||
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0, alloc.levels,
|
||||
0, texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
|
||||
command_buffer.copyBufferToImage(staging.buffer, alloc.image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
copy_region);
|
||||
}
|
||||
|
||||
InvalidateAllWatcher();
|
||||
|
||||
// Lock this data until the next scheduler switch
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
runtime.staging_offsets[current_slot] += staging.size;
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
|
||||
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Download);
|
||||
|
||||
runtime.renderpass_cache.ExitRenderpass();
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
ScaledDownload(download);
|
||||
} else {
|
||||
u32 region_count = 0;
|
||||
std::array<vk::BufferImageCopy, 2> copy_regions;
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
const VideoCore::Rect2D rect = download.texture_rect;
|
||||
vk::BufferImageCopy copy_region = {
|
||||
.bufferOffset = staging.buffer_offset,
|
||||
.bufferRowLength = rect.GetWidth(),
|
||||
.bufferImageHeight = rect.GetHeight(),
|
||||
.imageSubresource = {
|
||||
.aspectMask = alloc.aspect,
|
||||
.mipLevel = download.texture_level,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
|
||||
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
|
||||
};
|
||||
|
||||
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) {
|
||||
copy_regions[region_count++] = copy_region;
|
||||
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
|
||||
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
|
||||
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
|
||||
return; // HACK: Skip depth + stencil downloads for now
|
||||
copy_region.bufferOffset += staging.mapped.size();
|
||||
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
|
||||
copy_regions[region_count++] = copy_region;
|
||||
}
|
||||
}
|
||||
|
||||
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
|
||||
|
||||
// Copy pixel data to the staging buffer
|
||||
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
staging.buffer, region_count, copy_regions.data());
|
||||
|
||||
scheduler.Submit(SubmitMode::Flush);
|
||||
}
|
||||
|
||||
// Lock this data until the next scheduler switch
|
||||
const u32 current_slot = scheduler.GetCurrentSlotIndex();
|
||||
runtime.staging_offsets[current_slot] += staging.size;
|
||||
}
|
||||
|
||||
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
|
||||
/*const u32 rect_width = download.texture_rect.GetWidth();
|
||||
const u32 rect_height = download.texture_rect.GetHeight();
|
||||
|
||||
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
|
||||
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
|
||||
VideoCore::TextureType::Texture2D);
|
||||
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
|
||||
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture);
|
||||
|
||||
// Blit the scaled rectangle to the unscaled texture
|
||||
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
|
||||
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
|
||||
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
|
||||
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
if (driver.IsOpenGLES()) {
|
||||
const auto& downloader_es = runtime.GetDownloaderES();
|
||||
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
rect_height, rect_width,
|
||||
reinterpret_cast<void*>(download.buffer_offset));
|
||||
} else {
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(download.buffer_offset));
|
||||
}*/
|
||||
}
|
||||
|
||||
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
|
||||
/*const u32 rect_width = upload.texture_rect.GetWidth();
|
||||
const u32 rect_height = upload.texture_rect.GetHeight();
|
||||
|
||||
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
|
||||
VideoCore::TextureType::Texture2D);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
|
||||
|
||||
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
|
||||
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
|
||||
|
||||
const auto scaled_rect = upload.texture_rect * res_scale;
|
||||
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
|
||||
const auto& filterer = runtime.GetFilterer();
|
||||
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
|
||||
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
|
||||
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture);
|
||||
|
||||
// If filtering fails, resort to normal blitting
|
||||
glBlitFramebuffer(0, 0, rect_width, rect_height,
|
||||
upload.texture_rect.left, upload.texture_rect.bottom,
|
||||
upload.texture_rect.right, upload.texture_rect.top,
|
||||
MakeBufferMask(type), GL_LINEAR);
|
||||
}*/
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
143
src/video_core/renderer_vulkan/vk_texture_runtime.h
Normal file
143
src/video_core/renderer_vulkan/vk_texture_runtime.h
Normal file
@ -0,0 +1,143 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <span>
|
||||
#include <set>
|
||||
#include <vulkan/vulkan_hash.hpp>
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/rasterizer_cache/types.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct StagingData {
|
||||
vk::Buffer buffer;
|
||||
u32 size = 0;
|
||||
std::span<std::byte> mapped{};
|
||||
u32 buffer_offset = 0;
|
||||
};
|
||||
|
||||
struct ImageAlloc {
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
VmaAllocation allocation;
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
|
||||
u32 levels = 1;
|
||||
};
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
class Surface;
|
||||
|
||||
/**
|
||||
* Provides texture manipulation functions to the rasterizer cache
|
||||
* Separating this into a class makes it easier to abstract graphics API code
|
||||
*/
|
||||
class TextureRuntime {
|
||||
friend class Surface;
|
||||
public:
|
||||
TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache);
|
||||
~TextureRuntime();
|
||||
|
||||
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
||||
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
|
||||
|
||||
/// Allocates a vulkan image possibly resusing an existing one
|
||||
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type);
|
||||
|
||||
/// Takes back ownership of the allocation for recycling
|
||||
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
|
||||
|
||||
/// Performs required format convertions on the staging data
|
||||
void FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
std::span<std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
/// Transitions the mip level range of the surface to new_layout
|
||||
void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
|
||||
vk::ImageLayout new_layout, u32 level, u32 level_count,
|
||||
u32 layer = 0, u32 layer_count = 1);
|
||||
|
||||
/// Fills the rectangle of the texture with the clear value provided
|
||||
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
VideoCore::ClearValue value);
|
||||
|
||||
/// Copies a rectangle of src_tex to another rectange of dst_rect
|
||||
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
|
||||
|
||||
/// Blits a rectangle of src_tex to another rectange of dst_rect
|
||||
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
|
||||
|
||||
/// Generates mipmaps for all the available levels of the texture
|
||||
void GenerateMipmaps(Surface& surface, u32 max_level);
|
||||
|
||||
/// Performs operations that need to be done on every scheduler slot switch
|
||||
void OnSlotSwitch(u32 new_slot);
|
||||
|
||||
private:
|
||||
/// Returns the current Vulkan instance
|
||||
const Instance& GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
/// Returns the current Vulkan scheduler
|
||||
TaskScheduler& GetScheduler() const {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
|
||||
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
|
||||
std::unordered_multimap<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
|
||||
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
|
||||
};
|
||||
|
||||
class Surface : public VideoCore::SurfaceBase<Surface> {
|
||||
friend class TextureRuntime;
|
||||
friend class RasterizerVulkan;
|
||||
public:
|
||||
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
|
||||
~Surface() override;
|
||||
|
||||
/// Uploads pixel data in staging to a rectangle region of the surface texture
|
||||
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
|
||||
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
|
||||
|
||||
private:
|
||||
/// Downloads scaled image by downscaling the requested rectangle
|
||||
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
|
||||
|
||||
/// Uploads pixel data to scaled texture
|
||||
void ScaledUpload(const VideoCore::BufferTextureCopy& upload);
|
||||
|
||||
/// Overrides the image layout of the mip level range
|
||||
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
|
||||
|
||||
private:
|
||||
TextureRuntime& runtime;
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
|
||||
ImageAlloc alloc{};
|
||||
vk::Format internal_format = vk::Format::eUndefined;
|
||||
};
|
||||
|
||||
struct Traits {
|
||||
using RuntimeType = TextureRuntime;
|
||||
using SurfaceType = Surface;
|
||||
};
|
||||
|
||||
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
|
||||
|
||||
} // namespace Vulkan
|
97
src/video_core/shader/shader_cache.h
Normal file
97
src/video_core/shader/shader_cache.h
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Pica::Shader {
|
||||
|
||||
template <typename ShaderType>
|
||||
using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
|
||||
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
|
||||
std::string(*CodeGenerator)(const KeyType&)>
|
||||
class ShaderCache {
|
||||
public:
|
||||
ShaderCache() {}
|
||||
~ShaderCache() = default;
|
||||
|
||||
/// Returns a shader handle generated from the provided config
|
||||
template <typename... Args>
|
||||
auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult<ShaderType> {
|
||||
auto [iter, new_shader] = shaders.emplace(config, ShaderType{});
|
||||
auto& shader = iter->second;
|
||||
|
||||
if (new_shader) {
|
||||
std::string code = CodeGenerator(config);
|
||||
shader = ModuleCompiler(code, args...);
|
||||
return std::make_pair(shader, code);
|
||||
}
|
||||
|
||||
return std::make_pair(shader, std::nullopt);
|
||||
}
|
||||
|
||||
void Inject(const KeyType& key, ShaderType&& shader) {
|
||||
shaders.emplace(key, std::move(shader));
|
||||
}
|
||||
|
||||
public:
|
||||
std::unordered_map<KeyType, ShaderType> shaders;
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a cache designed for shaders translated from PICA shaders. The first cache matches the
|
||||
* config structure like a normal cache does. On cache miss, the second cache matches the generated
|
||||
* GLSL code. The configuration is like this because there might be leftover code in the PICA shader
|
||||
* program buffer from the previous shader, which is hashed into the config, resulting several
|
||||
* different config values from the same shader program.
|
||||
*/
|
||||
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
|
||||
std::optional<std::string>(*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyType&)>
|
||||
class ShaderDoubleCache {
|
||||
public:
|
||||
ShaderDoubleCache() = default;
|
||||
~ShaderDoubleCache() = default;
|
||||
|
||||
template <typename... Args>
|
||||
auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) -> ShaderCacheResult<ShaderType> {
|
||||
if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
|
||||
auto code = CodeGenerator(setup, key);
|
||||
if (!code) {
|
||||
shader_map[key] = nullptr;
|
||||
return std::make_pair(ShaderType{}, std::nullopt);
|
||||
}
|
||||
|
||||
std::string& program = code.value();
|
||||
auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{});
|
||||
auto& shader = iter->second;
|
||||
|
||||
if (new_shader) {
|
||||
shader = ModuleCompiler(program, args...);
|
||||
}
|
||||
|
||||
shader_map[key] = &shader;
|
||||
return std::make_pair(shader, std::move(program));
|
||||
} else {
|
||||
return std::make_pair(*map_iter->second, std::nullopt);
|
||||
}
|
||||
}
|
||||
|
||||
void Inject(const KeyType& key, std::string decomp, ShaderType&& program) {
|
||||
const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first;
|
||||
|
||||
auto& cached_shader = iter->second;
|
||||
shader_map.insert_or_assign(key, &cached_shader);
|
||||
}
|
||||
|
||||
public:
|
||||
std::unordered_map<KeyType, ShaderType*> shader_map;
|
||||
std::unordered_map<std::string, ShaderType> shader_cache;
|
||||
};
|
||||
|
||||
} // namespace Pica::Shader
|
25
src/video_core/shader/shader_uniforms.cpp
Normal file
25
src/video_core/shader/shader_uniforms.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include "video_core/shader/shader.h"
|
||||
#include "video_core/shader/shader_uniforms.h"
|
||||
|
||||
namespace Pica::Shader {
|
||||
|
||||
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup) {
|
||||
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
|
||||
[](bool value) -> BoolAligned { return {value ? 1 : 0}; });
|
||||
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),
|
||||
[](const auto& value) -> Common::Vec4u {
|
||||
return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
|
||||
});
|
||||
std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f),
|
||||
[](const auto& value) -> Common::Vec4f {
|
||||
return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(),
|
||||
value.w.ToFloat32()};
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Pica::Shader
|
99
src/video_core/shader/shader_uniforms.h
Normal file
99
src/video_core/shader/shader_uniforms.h
Normal file
@ -0,0 +1,99 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/regs_lighting.h"
|
||||
|
||||
namespace Pica {
|
||||
struct ShaderRegs;
|
||||
}
|
||||
|
||||
namespace Pica::Shader {
|
||||
|
||||
class ShaderSetup;
|
||||
|
||||
enum class UniformBindings : u32 { Common, VS, GS };
|
||||
|
||||
struct LightSrc {
|
||||
alignas(16) Common::Vec3f specular_0;
|
||||
alignas(16) Common::Vec3f specular_1;
|
||||
alignas(16) Common::Vec3f diffuse;
|
||||
alignas(16) Common::Vec3f ambient;
|
||||
alignas(16) Common::Vec3f position;
|
||||
alignas(16) Common::Vec3f spot_direction; // negated
|
||||
float dist_atten_bias;
|
||||
float dist_atten_scale;
|
||||
};
|
||||
|
||||
/**
|
||||
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
|
||||
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
* Not following that rule will cause problems on some AMD drivers.
|
||||
*/
|
||||
struct UniformData {
|
||||
int framebuffer_scale;
|
||||
int alphatest_ref;
|
||||
float depth_scale;
|
||||
float depth_offset;
|
||||
float shadow_bias_constant;
|
||||
float shadow_bias_linear;
|
||||
int scissor_x1;
|
||||
int scissor_y1;
|
||||
int scissor_x2;
|
||||
int scissor_y2;
|
||||
int fog_lut_offset;
|
||||
int proctex_noise_lut_offset;
|
||||
int proctex_color_map_offset;
|
||||
int proctex_alpha_map_offset;
|
||||
int proctex_lut_offset;
|
||||
int proctex_diff_lut_offset;
|
||||
float proctex_bias;
|
||||
int shadow_texture_bias;
|
||||
bool enable_clip1;
|
||||
alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4];
|
||||
alignas(16) Common::Vec3f fog_color;
|
||||
alignas(8) Common::Vec2f proctex_noise_f;
|
||||
alignas(8) Common::Vec2f proctex_noise_a;
|
||||
alignas(8) Common::Vec2f proctex_noise_p;
|
||||
alignas(16) Common::Vec3f lighting_global_ambient;
|
||||
LightSrc light_src[8];
|
||||
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
|
||||
alignas(16) Common::Vec4f tev_combiner_buffer_color;
|
||||
alignas(16) Common::Vec4f clip_coef;
|
||||
};
|
||||
|
||||
static_assert(sizeof(UniformData) == 0x4F0,
|
||||
"The size of the UniformData does not match the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16384,
|
||||
"UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
/**
|
||||
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
|
||||
* NOTE: the same rule from UniformData also applies here.
|
||||
*/
|
||||
struct PicaUniformsData {
|
||||
void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup);
|
||||
|
||||
struct BoolAligned {
|
||||
alignas(16) int b;
|
||||
};
|
||||
|
||||
std::array<BoolAligned, 16> bools;
|
||||
alignas(16) std::array<Common::Vec4u, 4> i;
|
||||
alignas(16) std::array<Common::Vec4f, 96> f;
|
||||
};
|
||||
|
||||
struct VSUniformData {
|
||||
PicaUniformsData uniforms;
|
||||
};
|
||||
static_assert(sizeof(VSUniformData) == 1856,
|
||||
"The size of the VSUniformData does not match the structure in the shader");
|
||||
static_assert(sizeof(VSUniformData) < 16384,
|
||||
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
|
||||
} // namespace Pica::Shader
|
@ -227,14 +227,14 @@ void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> des
|
||||
for (std::size_t i = 0; i < source.size(); i += 3) {
|
||||
u32 bgr{};
|
||||
std::memcpy(&bgr, source.data() + i, 3);
|
||||
const u32 rgb = std::byteswap(bgr << 8);
|
||||
const u32 rgb = Common::swap32(bgr << 8);
|
||||
std::memcpy(dest.data(), &rgb, 3);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (u32 i = 0; i < source.size(); i += 3) {
|
||||
for (std::size_t i = 0; i < source.size(); i += 3) {
|
||||
dest[j] = source[i + 2];
|
||||
dest[j + 1] = source[i + 1];
|
||||
dest[j + 2] = source[i];
|
||||
@ -246,7 +246,7 @@ void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> de
|
||||
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
for (u32 i = 0; i < source.size(); i += 4) {
|
||||
const u32 abgr = *reinterpret_cast<const u32*>(source.data() + i);
|
||||
const u32 rgba = std::byteswap(abgr);
|
||||
const u32 rgba = Common::swap32(abgr);
|
||||
std::memcpy(dest.data() + i, &rgba, 4);
|
||||
}
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -44,15 +45,26 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondar
|
||||
g_memory = &memory;
|
||||
Pica::Init();
|
||||
|
||||
OpenGL::GLES = Settings::values.graphics_api.GetValue() == Settings::GraphicsAPI::OpenGLES;
|
||||
const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api.GetValue();
|
||||
switch (graphics_api) {
|
||||
case Settings::GraphicsAPI::OpenGL:
|
||||
case Settings::GraphicsAPI::OpenGLES:
|
||||
OpenGL::GLES = graphics_api == Settings::GraphicsAPI::OpenGLES;
|
||||
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window, secondary_window);
|
||||
break;
|
||||
case Settings::GraphicsAPI::Vulkan:
|
||||
g_renderer = std::make_unique<Vulkan::RendererVulkan>(emu_window);
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render, "Invalid graphics API enum value {}", graphics_api);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window, secondary_window);
|
||||
ResultStatus result = g_renderer->Init();
|
||||
|
||||
if (result != ResultStatus::Success) {
|
||||
LOG_ERROR(Render, "initialization failed !");
|
||||
LOG_ERROR(Render, "Video core initialization failed");
|
||||
} else {
|
||||
LOG_DEBUG(Render, "initialized OK");
|
||||
LOG_INFO(Render, "Video core initialization OK");
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -3,8 +3,8 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <system_error>
|
||||
#include <jwt/jwt.hpp>
|
||||
#include "common/logging/log.h"
|
||||
#include <jwt/jwt.hpp>
|
||||
#include "common/web_result.h"
|
||||
#include "web_service/verify_user_jwt.h"
|
||||
#include "web_service/web_backend.h"
|
||||
|
Reference in New Issue
Block a user