renderer_vulkan: Add experimental Vulkan renderer

This commit is contained in:
emufan4568
2022-09-16 17:19:09 +03:00
committed by GPUCode
parent 945faf8e92
commit 9675811bbe
70 changed files with 10347 additions and 161 deletions

6
.gitmodules vendored
View File

@ -61,3 +61,9 @@
[submodule "vulkan-headers"]
path = externals/vulkan-headers
url = https://github.com/KhronosGroup/Vulkan-Headers
[submodule "glslang"]
path = externals/glslang
url = https://github.com/KhronosGroup/glslang
[submodule "glm"]
path = externals/glm
url = https://github.com/g-truc/glm

View File

@ -9,6 +9,7 @@ cmake_policy(SET CMP0069 NEW)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
include(DownloadExternals)
include(GNUInstallDirs)
include(CMakeDependentOption)
project(citra LANGUAGES C CXX ASM)

View File

@ -60,6 +60,16 @@ endif()
# Glad
add_subdirectory(glad)
# glslang
set(SKIP_GLSLANG_INSTALL ON)
set(ENABLE_GLSLANG_BINARIES OFF)
set(ENABLE_SPVREMAPPER OFF)
set(ENABLE_CTEST OFF)
add_subdirectory(glslang)
# glm
add_subdirectory(glm)
# inih
add_subdirectory(inih)

1
externals/glm vendored Submodule

Submodule externals/glm added at cc98465e35

1
externals/glslang vendored Submodule

Submodule externals/glslang added at c0cf8ad876

View File

@ -122,6 +122,7 @@ else()
if (MINGW)
add_definitions(-DMINGW_HAS_SECURE_API)
add_compile_options("-Wa,-mbig-obj")
if (COMPILE_WITH_DWARF)
add_compile_options("-gdwarf")
endif()

View File

@ -269,6 +269,10 @@ target_link_libraries(citra-qt PRIVATE audio_core common core input_common netwo
target_link_libraries(citra-qt PRIVATE Boost::boost glad nihstro-headers Qt5::Widgets Qt5::Multimedia)
target_link_libraries(citra-qt PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
if (NOT WIN32)
target_include_directories(citra-qt PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
endif()
target_compile_definitions(citra-qt PRIVATE
# Use QStringBuilder for string concatenation to reduce
# the overall number of temporary strings created.

View File

@ -25,6 +25,10 @@
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
#if !defined(WIN32)
#include <qpa/qplatformnativeinterface.h>
#endif
EmuThread::EmuThread(Frontend::GraphicsContext& core_context) : core_context(core_context) {}
EmuThread::~EmuThread() = default;
@ -53,6 +57,7 @@ void EmuThread::run() {
});
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
emit HideLoadingScreen();
core_context.MakeCurrent();
@ -303,6 +308,40 @@ public:
}
};
static Frontend::WindowSystemType GetWindowSystemType() {
// Determine WSI type based on Qt platform.
QString platform_name = QGuiApplication::platformName();
if (platform_name == QStringLiteral("windows"))
return Frontend::WindowSystemType::Windows;
else if (platform_name == QStringLiteral("xcb"))
return Frontend::WindowSystemType::X11;
else if (platform_name == QStringLiteral("wayland"))
return Frontend::WindowSystemType::Wayland;
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
return Frontend::WindowSystemType::Windows;
}
static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
Frontend::EmuWindow::WindowSystemInfo wsi;
wsi.type = GetWindowSystemType();
// Our Win32 Qt external doesn't have the private API.
#if defined(WIN32) || defined(__APPLE__)
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
#else
QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
wsi.display_connection = pni->nativeResourceForWindow("display", window);
if (wsi.type == Frontend::WindowSystemType::Wayland)
wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
else
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
#endif
wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
return wsi;
}
GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread, bool is_secondary_)
: QWidget(parent_), EmuWindow(is_secondary_), emu_thread(emu_thread) {
@ -539,6 +578,9 @@ bool GRenderWindow::InitRenderTarget() {
break;
}
// Update the Window System information with the new render target
window_info = GetWindowSystemInfo(child_widget->windowHandle());
child_widget->resize(Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight);
layout()->addWidget(child_widget);

View File

@ -76,6 +76,7 @@ void ConfigureGraphics::SetConfiguration() {
ui->toggle_accurate_mul->setChecked(Settings::values.shaders_accurate_mul.GetValue());
ui->toggle_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new.GetValue());
ui->graphics_api_combo->setCurrentIndex(static_cast<int>(Settings::values.graphics_api.GetValue()));
if (Settings::IsConfiguringGlobal()) {
ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue());
@ -95,6 +96,7 @@ void ConfigureGraphics::ApplyConfiguration() {
ui->toggle_disk_shader_cache, use_disk_shader_cache);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync_new, ui->toggle_vsync_new,
use_vsync_new);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.graphics_api, ui->graphics_api_combo);
if (Settings::IsConfiguringGlobal()) {
Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked();

View File

@ -6,6 +6,7 @@
#include <cstddef>
#include <cstring>
#include <concepts>
#include "common/cityhash.h"
#include "common/common_types.h"
@ -41,6 +42,13 @@ inline u64 HashCombine(std::size_t& seed, const u64 hash) {
return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
template <std::integral T>
struct IdentityHash {
T operator()(const T& value) const {
return value;
}
};
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
template <typename T>
struct HashableStruct {

View File

@ -236,6 +236,7 @@ void DebuggerBackend::Write(const Entry& entry) {
CLS(Render) \
SUB(Render, Software) \
SUB(Render, OpenGL) \
SUB(Render, Vulkan) \
CLS(Audio) \
SUB(Audio, DSP) \
SUB(Audio, Sink) \

View File

@ -4,8 +4,8 @@
#pragma once
#include <type_traits>
#include <fmt/format.h>
#include <type_traits>
// adapted from https://github.com/fmtlib/fmt/issues/2704
// a generic formatter for enum classes

View File

@ -8,6 +8,7 @@
#include <array>
#include "common/common_types.h"
#include "common/logging/formatter.h"
namespace Log {
// trims up to and including the last of ../, ..\, src/, src\ in a string
@ -103,6 +104,7 @@ enum class Class : ClassType {
Render, ///< Emulator video output and hardware acceleration
Render_Software, ///< Software renderer backend
Render_OpenGL, ///< OpenGL backend
Render_Vulkan, ///< Vulkan backend
Audio, ///< Audio emulation
Audio_DSP, ///< The HLE and LLE implementations of the DSP
Audio_Sink, ///< Emulator audio output backend

View File

@ -5,6 +5,7 @@
#pragma once
#include <cstdlib>
#include <compare>
#include <type_traits>
namespace Common {

View File

@ -446,7 +446,7 @@ struct Values {
Setting<bool> allow_plugin_loader{true, "allow_plugin_loader"};
// Renderer
SwitchableSetting<GraphicsAPI> graphics_api{GraphicsAPI::OpenGL, "graphics_api"};
SwitchableSetting<GraphicsAPI> graphics_api{GraphicsAPI::Vulkan, "graphics_api"};
SwitchableSetting<bool> use_hw_renderer{true, "use_hw_renderer"};
SwitchableSetting<bool> use_hw_shader{true, "use_hw_shader"};
SwitchableSetting<bool> separable_shader{false, "use_separable_shader"};

View File

@ -14,6 +14,15 @@
namespace Frontend {
/// Information for the Graphics Backends signifying what type of screen pointer is in
/// WindowInformation
enum class WindowSystemType : u8 {
Headless,
Windows,
X11,
Wayland,
};
struct Frame;
/**
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
@ -122,6 +131,23 @@ public:
Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight};
};
/// Data describing host window system information
struct WindowSystemInfo {
// Window system type. Determines which GL context or Vulkan WSI is used.
WindowSystemType type = WindowSystemType::Headless;
// Connection to a display server. This is used on X11 and Wayland platforms.
void* display_connection = nullptr;
// Render surface. This is a pointer to the native window handle, which depends
// on the platform. e.g. HWND for Windows, Window for X11. If the surface is
// set to nullptr, the video backend will run in headless mode.
void* render_surface = nullptr;
// Scale of the render surface. For hidpi systems, this will be >1.
float render_surface_scale = 1.0f;
};
/// Polls window events
virtual void PollEvents() = 0;
@ -185,6 +211,13 @@ public:
config = val;
}
/**
* Returns system information about the drawing area.
*/
const WindowSystemInfo& GetWindowInfo() const {
return window_info;
}
/**
* Gets the framebuffer layout (width, height, and screen regions)
* @note This method is thread-safe
@ -233,6 +266,7 @@ protected:
}
bool is_secondary{};
WindowSystemInfo window_info;
private:
/**

View File

@ -595,33 +595,6 @@ bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const {
return GetPhysicalRef(paddr);
}
PAddr MemorySystem::ClampPhysicalAddress(PAddr base, PAddr address) const {
struct MemoryArea {
PAddr paddr_base;
u32 size;
};
constexpr std::array memory_areas = {
MemoryArea{VRAM_PADDR, VRAM_SIZE},
MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE},
MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE},
MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE},
};
const auto area =
std::ranges::find_if(memory_areas, [&](const MemoryArea& area) {
return base >= area.paddr_base && base <= area.paddr_base + area.size;
});
if (area == memory_areas.end()) {
LOG_ERROR(HW_Memory, "Unknown base address used for clamping {:#08X} at PC {:#08X}", base,
Core::GetRunningCore().GetPC());
return address;
}
return std::clamp(address, area->paddr_base, area->paddr_base + area->size);
}
u8* MemorySystem::GetPointer(const VAddr vaddr) {
u8* page_pointer = impl->current_page_table->pointers[vaddr >> CITRA_PAGE_BITS];
if (page_pointer) {

View File

@ -587,9 +587,6 @@ public:
/// Returns true if the address refers to a valid memory region
bool IsValidPhysicalAddress(PAddr paddr) const;
/// Clamps the address to the boundaries of the memory region pointed by base
PAddr ClampPhysicalAddress(PAddr base, PAddr address) const;
/// Gets offset in FCRAM from a pointer inside FCRAM range
u32 GetFCRAMOffset(const u8* pointer) const;

View File

@ -85,11 +85,41 @@ add_library(video_core STATIC
#temporary, move these back in alphabetical order before merging
renderer_opengl/gl_format_reinterpreter.cpp
renderer_opengl/gl_format_reinterpreter.h
renderer_vulkan/pica_to_vk.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_platform.cpp
renderer_vulkan/vk_platform.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_shader_gen.cpp
renderer_vulkan/vk_shader_gen.h
renderer_vulkan/vk_shader.cpp
renderer_vulkan/vk_shader.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_task_scheduler.cpp
renderer_vulkan/vk_task_scheduler.h
renderer_vulkan/vk_texture_runtime.cpp
renderer_vulkan/vk_texture_runtime.h
shader/debug_data.h
shader/shader.cpp
shader/shader.h
shader/shader_cache.h
shader/shader_interpreter.cpp
shader/shader_interpreter.h
shader/shader_uniforms.cpp
shader/shader_uniforms.h
swrasterizer/clipper.cpp
swrasterizer/clipper.h
swrasterizer/framebuffer.cpp
@ -160,8 +190,11 @@ endif()
create_target_directory_groups(video_core)
# Include Vulkan headers
target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include)
target_include_directories(video_core PRIVATE ../../externals/vma)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad nihstro-headers Boost::serialization)
target_link_libraries(video_core PRIVATE glad glm::glm SPIRV glslang nihstro-headers Boost::serialization)
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
if (ARCHITECTURE_x86_64)

View File

@ -40,7 +40,7 @@ void Zero(T& o) {
State::State() : geometry_pipeline(*this) {
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
using Pica::Shader::OutputVertex;
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
const OutputVertex& v2) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};

View File

@ -136,7 +136,7 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::sp
}
template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 height, u32 start_offset,
static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset,
std::span<std::byte> linear_buffer,
std::span<std::byte> tiled_buffer) {
@ -148,7 +148,6 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset,
// becomes zero for 4-bit textures!
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
const u32 end_offset = start_offset + static_cast<u32>(tiled_buffer.size());
// Does this line have any significance?
//u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
@ -216,7 +215,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset,
}
}
using MortonFunc = void (*)(u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
using MortonFunc = void (*)(u32, u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
MortonCopy<true, PixelFormat::RGBA8>, // 0

View File

@ -6,6 +6,7 @@
#include <algorithm>
#include <unordered_map>
#include <optional>
#include <vector>
#include <boost/range/iterator_range.hpp>
#include "common/alignment.h"
#include "common/logging/log.h"
@ -46,9 +47,9 @@ class RasterizerAccelerated;
template <class T>
class RasterizerCache : NonCopyable {
public:
using TextureRuntime = typename T::Runtime;
using Surface = std::shared_ptr<typename T::Surface>;
using Watcher = SurfaceWatcher<typename T::Surface>;
using TextureRuntime = typename T::RuntimeType;
using Surface = std::shared_ptr<typename T::SurfaceType>;
using Watcher = SurfaceWatcher<typename T::SurfaceType>;
private:
/// Declare rasterizer interval types
@ -754,7 +755,7 @@ auto RasterizerCache<T>::GetFillSurface(const GPU::Regs::MemoryFillConfig& confi
params.type = SurfaceType::Fill;
params.res_scale = std::numeric_limits<u16>::max();
Surface new_surface = std::make_shared<typename T::Surface>(params, runtime);
Surface new_surface = std::make_shared<typename T::SurfaceType>(params, runtime);
std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4);
if (config.fill_32bit) {
@ -893,32 +894,23 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
ASSERT(load_start >= surface->addr && load_end <= surface->end);
const auto& staging = runtime.FindStaging(
surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true);
surface->width * surface->height * 4, true);
MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr);
if (!source_ptr) [[unlikely]] {
return;
}
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
const u32 start_offset = load_start - surface->addr;
const u32 upload_size = static_cast<u32>(upload_data.size());
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
if (!surface->is_tiled) {
ASSERT(surface->type == SurfaceType::Color);
const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size);
/*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) {
Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer);
} else if (surface->pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer);
if (surface->is_tiled) {
std::vector<std::byte> unswizzled_data(staging.size);
UnswizzleTexture(*surface, load_start - surface->addr, load_end - surface->addr,
upload_data, unswizzled_data);
runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped);
} else {
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
}*/
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
} else {
UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped);
runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped);
}
const BufferTextureCopy upload = {
@ -939,7 +931,7 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
ASSERT(flush_start >= surface->addr && flush_end <= surface->end);
const auto& staging = runtime.FindStaging(
surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false);
surface->width * surface->height * 4, false);
const SurfaceParams params = surface->FromInterval(interval);
const BufferTextureCopy download = {
.buffer_offset = 0,
@ -956,25 +948,16 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
}
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
const u32 start_offset = flush_start - surface->addr;
const u32 download_size = static_cast<u32>(download_dest.size());
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
if (!surface->is_tiled) {
ASSERT(surface->type == SurfaceType::Color);
const auto download_data = staging.mapped.subspan(start_offset, download_size);
/*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) {
Pica::Texture::ConvertABGRToRGBA(download_data, download_dest);
} else if (surface->pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(download_data, download_dest);
if (surface->is_tiled) {
std::vector<std::byte> swizzled_data(staging.size);
runtime.FormatConvert(surface->pixel_format, false, swizzled_data, swizzled_data);
SwizzleTexture(*surface, flush_start - surface->addr, flush_end - surface->addr,
staging.mapped, download_dest);
} else {
std::memcpy(download_dest.data(), download_data.data(), download_size);
}*/
std::memcpy(download_dest.data(), download_data.data(), download_size);
} else {
SwizzleTexture(*surface, start_offset, staging.mapped, download_dest);
runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest);
}
}
@ -1228,7 +1211,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, const Surface& r
template <class T>
auto RasterizerCache<T>::CreateSurface(SurfaceParams& params) -> Surface {
Surface surface = std::make_shared<typename T::Surface>(params, runtime);
Surface surface = std::make_shared<typename T::SurfaceType>(params, runtime);
surface->invalid_regions.insert(surface->GetInterval());
return surface;

View File

@ -3,30 +3,26 @@
// Refer to the license.txt file included.
#pragma once
#include <glad/glad.h>
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/texture/texture_decode.h"
#include "video_core/rasterizer_cache/morton_swizzle.h"
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/video_core.h"
namespace VideoCore {
void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
const u32 func_index = static_cast<u32>(params.pixel_format);
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
SwizzleImpl(params.stride, params.height, start_offset, source_linear, dest_tiled);
SwizzleImpl(params.stride, params.height, start_offset, end_offset, source_linear, dest_tiled);
}
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset,
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
const u32 func_index = static_cast<u32>(params.pixel_format);
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
UnswizzleImpl(params.stride, params.height, start_offset, dest_linear, source_tiled);
UnswizzleImpl(params.stride, params.height, start_offset, end_offset, dest_linear, source_tiled);
}
ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) {
@ -68,4 +64,4 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d
return result;
}
} // namespace OpenGL
} // namespace VideoCore

View File

@ -14,7 +14,6 @@ struct HostTextureTag {
PixelFormat format{};
u32 width = 0;
u32 height = 0;
u32 levels = 1;
u32 layers = 1;
auto operator<=>(const HostTextureTag&) const noexcept = default;
@ -45,7 +44,7 @@ class SurfaceParams;
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
/**
@ -56,7 +55,7 @@ void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
* @param source_tiled The source morton swizzled data.
* @param dest_linear The output buffer where the generated linear data will be written to.
*/
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset,
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset,
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
} // namespace VideoCore

View File

@ -9,10 +9,6 @@
#include "common/common_types.h"
#include "core/hw/gpu.h"
namespace OpenGL {
struct ScreenInfo;
}
namespace Pica::Shader {
struct OutputVertex;
} // namespace Pica::Shader
@ -73,13 +69,6 @@ public:
return false;
}
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
PAddr framebuffer_addr, u32 pixel_stride,
OpenGL::ScreenInfo& screen_info) {
return false;
}
/// Attempt to draw using hardware shaders
virtual bool AccelerateDrawBatch(bool is_indexed) {
return false;

View File

@ -159,6 +159,7 @@ struct FramebufferRegs {
} stencil_test;
union {
u32 depth_color_mask;
BitField<0, 1, u32> depth_test_enable;
BitField<4, 3, CompareFunc> depth_test_func;
BitField<8, 1, u32> red_enable;

View File

@ -6,8 +6,7 @@
#include <array>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {
@ -18,7 +17,7 @@ struct RasterizerRegs {
KeepAll = 0,
KeepClockWise = 1,
KeepCounterClockWise = 2,
// TODO: What does the third value imply?
KeepAll2 = 3
};
union {

View File

@ -21,7 +21,3 @@ void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) {
update_layout(*secondary_window);
}
}
void RendererBase::Sync() {
rasterizer->SyncEntireState();
}

View File

@ -21,6 +21,9 @@ public:
/// Initialize the renderer
virtual VideoCore::ResultStatus Init() = 0;
/// Returns the rasterizer owned by the renderer
virtual VideoCore::RasterizerInterface* Rasterizer() = 0;
/// Shutdown the renderer
virtual void ShutDown() = 0;
@ -40,6 +43,8 @@ public:
/// Cleans up after video dumping is ended
virtual void CleanupVideoDumping() = 0;
virtual void Sync() = 0;
/// Updates the framebuffer layout of the contained render window handle.
void UpdateCurrentFramebufferLayout(bool is_portrait_mode = {});
@ -54,10 +59,6 @@ public:
return m_current_frame;
}
VideoCore::RasterizerInterface* Rasterizer() const {
return rasterizer.get();
}
Frontend::EmuWindow& GetRenderWindow() {
return render_window;
}
@ -66,12 +67,9 @@ public:
return render_window;
}
void Sync();
protected:
Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
Frontend::EmuWindow* secondary_window; ///< Reference to the secondary render window handle.
std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
};

View File

@ -112,14 +112,15 @@ void Driver::ReportDriverInfo() {
}
void Driver::DeduceVendor() {
if (gpu_vendor.contains("NVIDIA")) {
if (gpu_vendor.find("NVIDIA") != gpu_vendor.npos) {
vendor = Vendor::Nvidia;
} else if (gpu_vendor.contains("ATI") ||
gpu_vendor.contains("Advanced Micro Devices")) {
} else if ((gpu_vendor.find("ATI") != gpu_vendor.npos) ||
(gpu_vendor.find("AMD") != gpu_vendor.npos) ||
(gpu_vendor.find("Advanced Micro Devices") != gpu_vendor.npos)) {
vendor = Vendor::AMD;
} else if (gpu_vendor.contains("Intel")) {
} else if (gpu_vendor.find("Intel") != gpu_vendor.npos) {
vendor = Vendor::Intel;
} else if (gpu_vendor.contains("GDI Generic")) {
} else if (gpu_vendor.find("GDI Generic") != gpu_vendor.npos) {
vendor = Vendor::Generic;
}
}

View File

@ -243,17 +243,12 @@ private:
};
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
const std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
const std::string_view version{reinterpret_cast<const char*>(glGetString(GL_VERSION))};
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
}

View File

@ -20,6 +20,9 @@ class EmuWindow;
}
namespace OpenGL {
struct ScreenInfo;
class Driver;
class ShaderProgramManager;
@ -43,7 +46,7 @@ public:
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) override;
u32 pixel_stride, ScreenInfo& screen_info);
bool AccelerateDrawBatch(bool is_indexed) override;
/// Syncs entire status to match PICA registers

View File

@ -41,10 +41,12 @@ struct LightSrc {
float dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
/**
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
* Not following that rule will cause problems on some AMD drivers.
*/
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
@ -81,8 +83,10 @@ static_assert(sizeof(UniformData) == 0x4F0,
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// NOTE: the same rule from UniformData also applies here.
/**
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
* NOTE: the same rule from UniformData also applies here.
*/
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);

View File

@ -124,6 +124,17 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
return DEFAULT_TUPLE;
}
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest) {
if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
Pica::Texture::ConvertABGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {
Pica::Texture::ConvertBGRToRGB(source, dest);
} else {
std::memcpy(dest.data(), source.data(), source.size());
}
}
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) {
@ -302,9 +313,20 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
}
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
Surface::~Surface() {
const VideoCore::HostTextureTag tag = {
.format = pixel_format,
.width = GetScaledWidth(),
.height = GetScaledHeight(),
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u
};
runtime.texture_recycler.emplace(tag, std::move(texture));
}
MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
MICROPROFILE_SCOPE(OpenGL_Upload);
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
@ -327,8 +349,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.GetWidth(),
upload.texture_rect.GetHeight(),
tuple.format, tuple.type,
reinterpret_cast<void*>(upload.buffer_offset));
tuple.format, tuple.type, 0);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
@ -339,9 +360,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu
InvalidateAllWatcher();
}
MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64));
MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64));
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) {
MICROPROFILE_SCOPE(RasterizerCache_TextureDL);
MICROPROFILE_SCOPE(OpenGL_Download);
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
@ -361,7 +382,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
const auto& tuple = runtime.GetFormatTuple(pixel_format);
glReadPixels(download.texture_rect.left, download.texture_rect.bottom,
download.texture_rect.GetWidth(), download.texture_rect.GetHeight(),
tuple.format, tuple.type, reinterpret_cast<void*>(download.buffer_offset));
tuple.format, tuple.type, 0);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@ -390,11 +411,9 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
rect_height, rect_width, 0);
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0);
}
}
@ -409,7 +428,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
const auto& tuple = runtime.GetFormatTuple(pixel_format);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
tuple.format, tuple.type, 0);
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};

View File

@ -70,6 +70,10 @@ public:
/// Returns the OpenGL format tuple associated with the provided pixel format
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
/// Performs required format convertions on the staging data
void FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest);
/// Allocates an OpenGL texture with the specified dimentions and format
OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
@ -124,7 +128,7 @@ private:
class Surface : public VideoCore::SurfaceBase<Surface> {
public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
~Surface() override = default;
~Surface() override;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);
@ -148,8 +152,8 @@ public:
};
struct Traits {
using Runtime = TextureRuntime;
using Surface = Surface;
using RuntimeType = TextureRuntime;
using SurfaceType = Surface;
};
using RasterizerCache = VideoCore::RasterizerCache<Traits>;

View File

@ -15,7 +15,6 @@
#include "core/memory.h"
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
@ -383,6 +382,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
return VideoCore::ResultStatus::Success;
}
VideoCore::RasterizerInterface* RendererOpenGL::Rasterizer() {
return rasterizer.get();
}
/// Shutdown the renderer
void RendererOpenGL::ShutDown() {}
@ -580,7 +583,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
// only allows rows to have a memory alignement of 4.
ASSERT(pixel_stride % 4 == 0);
if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr,
if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr,
static_cast<u32>(pixel_stride), screen_info)) {
// Reset the screen info's display texture to its own permanent texture
screen_info.display_texture = screen_info.texture.resource.handle;
@ -1214,4 +1217,8 @@ void RendererOpenGL::CleanupVideoDumping() {
mailbox->free_cv.notify_one();
}
void RendererOpenGL::Sync() {
rasterizer->SyncEntireState();
}
} // namespace OpenGL

View File

@ -55,18 +55,16 @@ struct PresentationTexture {
OGLTexture texture;
};
class RasterizerOpenGL;
class RendererOpenGL : public RendererBase {
public:
explicit RendererOpenGL(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window);
~RendererOpenGL() override;
/// Initialize the renderer
VideoCore::ResultStatus Init() override;
/// Shutdown the renderer
VideoCore::RasterizerInterface* Rasterizer() override;
void ShutDown() override;
/// Finalizes rendering the guest frame
void SwapBuffers() override;
/// Draws the latest frame from texture mailbox to the currently bound draw framebuffer in this
@ -75,9 +73,8 @@ public:
/// Prepares for video dumping (e.g. create necessary buffers, etc)
void PrepareVideoDumping() override;
/// Cleans up after video dumping is ended
void CleanupVideoDumping() override;
void Sync() override;
private:
void InitOpenGLObjects();
@ -108,6 +105,7 @@ private:
private:
Driver driver;
OpenGLState state;
std::unique_ptr<RasterizerOpenGL> rasterizer;
// OpenGL object IDs
OGLVertexArray vertex_array;

View File

@ -0,0 +1,278 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace PicaToVK {
using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
struct FilterInfo {
vk::Filter mag_filter, min_filter;
vk::SamplerMipmapMode mip_mode;
};
inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
constexpr std::array filter_table = {
vk::Filter::eNearest,
vk::Filter::eLinear
};
constexpr std::array mipmap_table = {
vk::SamplerMipmapMode::eNearest,
vk::SamplerMipmapMode::eLinear
};
return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
}
inline vk::Filter TextureFilterMode(TextureFilter mode) {
switch (mode) {
case TextureFilter::Linear:
return vk::Filter::eLinear;
case TextureFilter::Nearest:
return vk::Filter::eNearest;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown texture filtering mode {}", mode);
UNIMPLEMENTED();
}
return vk::Filter::eLinear;
}
inline vk::SamplerMipmapMode TextureMipFilterMode(TextureFilter mip) {
switch (mip) {
case TextureFilter::Linear:
return vk::SamplerMipmapMode::eLinear;
case TextureFilter::Nearest:
return vk::SamplerMipmapMode::eNearest;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown texture mipmap filtering mode {}", mip);
UNIMPLEMENTED();
}
return vk::SamplerMipmapMode::eLinear;
}
inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
vk::SamplerAddressMode::eClampToEdge,
vk::SamplerAddressMode::eClampToBorder,
vk::SamplerAddressMode::eRepeat,
vk::SamplerAddressMode::eMirroredRepeat,
// TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
// comments in enum WrapMode.
vk::SamplerAddressMode::eClampToEdge,
vk::SamplerAddressMode::eClampToBorder,
vk::SamplerAddressMode::eRepeat,
vk::SamplerAddressMode::eRepeat,
}};
const auto index = static_cast<std::size_t>(mode);
// Range check table for input
if (index >= wrap_mode_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown texture wrap mode {}", index);
UNREACHABLE();
return vk::SamplerAddressMode::eClampToEdge;
}
if (index > 3) {
Core::System::GetInstance().TelemetrySession().AddField(
Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode",
static_cast<u32>(index));
LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index);
}
return wrap_mode_table[index];
}
inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
vk::BlendOp::eAdd,
vk::BlendOp::eSubtract,
vk::BlendOp::eReverseSubtract,
vk::BlendOp::eMin,
vk::BlendOp::eMax,
}};
const auto index = static_cast<std::size_t>(equation);
// Range check table for input
if (index >= blend_equation_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown blend equation {}", index);
// This return value is hwtested, not just a stub
return vk::BlendOp::eAdd;
}
return blend_equation_table[index];
}
inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
vk::BlendFactor::eZero, // BlendFactor::Zero
vk::BlendFactor::eOne, // BlendFactor::One
vk::BlendFactor::eSrcColor, // BlendFactor::SourceColor
vk::BlendFactor::eOneMinusSrcColor, // BlendFactor::OneMinusSourceColor
vk::BlendFactor::eDstColor, // BlendFactor::DestColor
vk::BlendFactor::eOneMinusDstColor, // BlendFactor::OneMinusDestColor
vk::BlendFactor::eSrcAlpha, // BlendFactor::SourceAlpha
vk::BlendFactor::eOneMinusSrcAlpha, // BlendFactor::OneMinusSourceAlpha
vk::BlendFactor::eDstAlpha, // BlendFactor::DestAlpha
vk::BlendFactor::eOneMinusDstAlpha, // BlendFactor::OneMinusDestAlpha
vk::BlendFactor::eConstantColor, // BlendFactor::ConstantColor
vk::BlendFactor::eOneMinusConstantColor,// BlendFactor::OneMinusConstantColor
vk::BlendFactor::eConstantAlpha, // BlendFactor::ConstantAlpha
vk::BlendFactor::eOneMinusConstantAlpha,// BlendFactor::OneMinusConstantAlpha
vk::BlendFactor::eSrcAlphaSaturate, // BlendFactor::SourceAlphaSaturate
}};
const auto index = static_cast<std::size_t>(factor);
// Range check table for input
if (index >= blend_func_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown blend factor {}", index);
UNREACHABLE();
return vk::BlendFactor::eOne;
}
return blend_func_table[index];
}
inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
vk::LogicOp::eClear, // Clear
vk::LogicOp::eAnd, // And
vk::LogicOp::eAndReverse, // AndReverse
vk::LogicOp::eCopy, // Copy
vk::LogicOp::eSet, // Set
vk::LogicOp::eCopyInverted, // CopyInverted
vk::LogicOp::eNoOp, // NoOp
vk::LogicOp::eInvert, // Invert
vk::LogicOp::eNand, // Nand
vk::LogicOp::eOr, // Or
vk::LogicOp::eNor, // Nor
vk::LogicOp::eXor, // Xor
vk::LogicOp::eEquivalent, // Equiv
vk::LogicOp::eAndInverted, // AndInverted
vk::LogicOp::eOrReverse, // OrReverse
vk::LogicOp::eOrInverted, // OrInverted
}};
const auto index = static_cast<std::size_t>(op);
// Range check table for input
if (index >= logic_op_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown logic op {}", index);
UNREACHABLE();
return vk::LogicOp::eCopy;
}
return logic_op_table[index];
}
inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
vk::CompareOp::eNever, // CompareFunc::Never
vk::CompareOp::eAlways, // CompareFunc::Always
vk::CompareOp::eEqual, // CompareFunc::Equal
vk::CompareOp::eNotEqual, // CompareFunc::NotEqual
vk::CompareOp::eLess, // CompareFunc::LessThan
vk::CompareOp::eLessOrEqual, // CompareFunc::LessThanOrEqual
vk::CompareOp::eGreater, // CompareFunc::GreaterThan
vk::CompareOp::eGreaterOrEqual, // CompareFunc::GreaterThanOrEqual
}};
const auto index = static_cast<std::size_t>(func);
// Range check table for input
if (index >= compare_func_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown compare function {}", index);
UNREACHABLE();
return vk::CompareOp::eAlways;
}
return compare_func_table[index];
}
inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
vk::StencilOp::eKeep, // StencilAction::Keep
vk::StencilOp::eZero, // StencilAction::Zero
vk::StencilOp::eReplace, // StencilAction::Replace
vk::StencilOp::eIncrementAndClamp, // StencilAction::Increment
vk::StencilOp::eDecrementAndClamp, // StencilAction::Decrement
vk::StencilOp::eInvert, // StencilAction::Invert
vk::StencilOp::eIncrementAndWrap, // StencilAction::IncrementWrap
vk::StencilOp::eDecrementAndWrap, // StencilAction::DecrementWrap
}};
const auto index = static_cast<std::size_t>(action);
// Range check table for input
if (index >= stencil_op_table.size()) {
LOG_CRITICAL(Render_Vulkan, "Unknown stencil op {}", index);
UNREACHABLE();
return vk::StencilOp::eKeep;
}
return stencil_op_table[index];
}
inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopology topology) {
switch (topology) {
case Pica::PipelineRegs::TriangleTopology::Fan:
return vk::PrimitiveTopology::eTriangleFan;
case Pica::PipelineRegs::TriangleTopology::List:
case Pica::PipelineRegs::TriangleTopology::Shader:
return vk::PrimitiveTopology::eTriangleList;
case Pica::PipelineRegs::TriangleTopology::Strip:
return vk::PrimitiveTopology::eTriangleStrip;
}
}
inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
switch (mode) {
case Pica::RasterizerRegs::CullMode::KeepAll:
case Pica::RasterizerRegs::CullMode::KeepAll2:
return vk::CullModeFlagBits::eNone;
case Pica::RasterizerRegs::CullMode::KeepClockWise:
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
return vk::CullModeFlagBits::eBack;
}
}
inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
switch (mode) {
case Pica::RasterizerRegs::CullMode::KeepAll:
case Pica::RasterizerRegs::CullMode::KeepAll2:
case Pica::RasterizerRegs::CullMode::KeepClockWise:
return vk::FrontFace::eCounterClockwise;
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
return vk::FrontFace::eClockwise;
}
}
inline Common::Vec4f ColorRGBA8(const u32 color) {
const auto rgba =
Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
return rgba / 255.0f;
}
inline Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
}
} // namespace PicaToGL

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <glm/glm.hpp>
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Layout {
struct FramebufferLayout;
}
namespace Vulkan {
/// Structure used for storing information about the textures for each 3DS screen
struct TextureInfo {
ImageAlloc alloc;
u32 width;
u32 height;
GPU::Regs::PixelFormat format;
};
/// Structure used for storing information about the display target for each 3DS screen
struct ScreenInfo {
ImageAlloc* display_texture = nullptr;
Common::Rectangle<float> display_texcoords;
TextureInfo texture;
vk::Sampler sampler;
};
// Uniform data used for presenting the 3DS screens
struct PresentUniformData {
glm::mat4 modelview;
Common::Vec4f i_resolution;
Common::Vec4f o_resolution;
int screen_id_l = 0;
int screen_id_r = 0;
int layer = 0;
int reverse_interlaced = 0;
// Returns an immutable byte view of the uniform data
auto AsBytes() const {
return std::as_bytes(std::span{this, 1});
}
};
static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!");
constexpr u32 PRESENT_PIPELINES = 3;
class RasterizerVulkan;
class RendererVulkan : public RendererBase {
public:
RendererVulkan(Frontend::EmuWindow& window);
~RendererVulkan() override;
VideoCore::ResultStatus Init() override;
VideoCore::RasterizerInterface* Rasterizer() override;
void ShutDown() override;
void SwapBuffers() override;
void TryPresent(int timeout_ms) override {}
void PrepareVideoDumping() override {}
void CleanupVideoDumping() override {}
void Sync() override;
private:
void ReloadSampler();
void ReloadPipeline();
void CompileShaders();
void BuildLayouts();
void BuildPipelines();
void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer);
void ConfigureRenderPipeline();
void PrepareRendertarget();
void BeginRendering();
void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped);
void DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h);
void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h);
void DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h);
void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h);
void UpdateFramerate();
/// Loads framebuffer from emulated memory into the display information structure
void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
ScreenInfo& screen_info, bool right_eye);
private:
Instance instance;
TaskScheduler scheduler;
RenderpassCache renderpass_cache;
TextureRuntime runtime;
Swapchain swapchain;
std::unique_ptr<RasterizerVulkan> rasterizer;
StreamBuffer vertex_buffer;
// Present pipelines (Normal, Anaglyph, Interlaced)
vk::PipelineLayout present_pipeline_layout;
vk::DescriptorSetLayout present_descriptor_layout;
vk::DescriptorUpdateTemplate present_update_template;
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
std::array<vk::DescriptorSet, PRESENT_PIPELINES> present_descriptor_sets;
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
std::array<vk::Sampler, 2> present_samplers;
vk::ShaderModule present_vertex_shader;
u32 current_pipeline = 0;
u32 current_sampler = 0;
/// Display information for top and bottom screens respectively
std::array<ScreenInfo, 3> screen_infos{};
PresentUniformData draw_info{};
vk::ClearColorValue clear_color{};
};
} // namespace Vulkan

View File

@ -0,0 +1,9 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VMA_IMPLEMENTATION
#include "video_core/renderer_vulkan/vk_common.h"
// Store the dispatch loader here
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE

View File

@ -0,0 +1,72 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
// Include vulkan-hpp header
#define VK_NO_PROTOTYPES 1
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#include <vulkan/vulkan.hpp>
// Include Vulkan memory allocator
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
#include <vk_mem_alloc.h>
namespace Vulkan {
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
/// Return the image aspect associated on the provided format
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
switch (format) {
case vk::Format::eD16UnormS8Uint:
case vk::Format::eD24UnormS8Uint:
case vk::Format::eX8D24UnormPack32:
case vk::Format::eD32SfloatS8Uint:
return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
break;
case vk::Format::eD16Unorm:
case vk::Format::eD32Sfloat:
return vk::ImageAspectFlagBits::eDepth;
break;
default:
return vk::ImageAspectFlagBits::eColor;
}
}
/// Returns a bit mask with the required usage of a format with a particular aspect
constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
auto usage = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
} else {
return usage | vk::ImageUsageFlagBits::eStorage |
vk::ImageUsageFlagBits::eColorAttachment;
}
}
/// Returns a bit mask with the required features of a format with a particular aspect
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
auto usage = vk::FormatFeatureFlagBits::eSampledImage |
vk::FormatFeatureFlagBits::eTransferDst |
vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst;
if (aspect & vk::ImageAspectFlagBits::eDepth) {
return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
} else {
return usage | vk::FormatFeatureFlagBits::eStorageImage |
vk::FormatFeatureFlagBits::eColorAttachment;
}
}
} // namespace Vulkan

View File

@ -0,0 +1,268 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <span>
#include "common/assert.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
Instance::Instance(Frontend::EmuWindow& window) {
auto window_info = window.GetWindowInfo();
// Fetch instance independant function pointers
vk::DynamicLoader dl;
auto vkGetInstanceProcAddr = dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
// Enable the instance extensions the backend uses
auto extensions = GetInstanceExtensions(window_info.type, true);
// We require a Vulkan 1.1 driver
const u32 available_version = vk::enumerateInstanceVersion();
if (available_version < VK_API_VERSION_1_1) {
LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
}
const vk::ApplicationInfo application_info = {
.pApplicationName = "Citra",
.applicationVersion = VK_MAKE_VERSION(1, 0, 0),
.pEngineName = "Citra Vulkan",
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
.apiVersion = available_version
};
const std::array layers = {"VK_LAYER_KHRONOS_validation"};
const vk::InstanceCreateInfo instance_info = {
.pApplicationInfo = &application_info,
.enabledLayerCount = static_cast<u32>(layers.size()),
.ppEnabledLayerNames = layers.data(),
.enabledExtensionCount = static_cast<u32>(extensions.size()),
.ppEnabledExtensionNames = extensions.data()
};
instance = vk::createInstance(instance_info);
surface = CreateSurface(instance, window);
// TODO: GPU select dialog
auto physical_devices = instance.enumeratePhysicalDevices();
physical_device = physical_devices[1];
device_properties = physical_device.getProperties();
CreateDevice();
}
Instance::~Instance() {
device.waitIdle();
vmaDestroyAllocator(allocator);
device.destroy();
instance.destroySurfaceKHR(surface);
instance.destroy();
}
bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
static std::unordered_map<vk::Format, vk::FormatProperties> supported;
if (auto it = supported.find(format); it != supported.end()) {
return (it->second.optimalTilingFeatures & usage) == usage;
}
// Cache format properties so we don't have to query the driver all the time
const vk::FormatProperties properties = physical_device.getFormatProperties(format);
supported.insert(std::make_pair(format, properties));
return (properties.optimalTilingFeatures & usage) == usage;
}
vk::Format Instance::GetFormatAlternative(vk::Format format) const {
if (format == vk::Format::eUndefined) {
return format;
}
vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
if (IsFormatSupported(format, features)) {
return format;
}
// Return the most supported alternative format preferably with the
// same block size according to the Vulkan spec.
// See 43.3. Required Format Support of the Vulkan spec
switch (format) {
case vk::Format::eD24UnormS8Uint:
return vk::Format::eD32SfloatS8Uint;
case vk::Format::eX8D24UnormPack32:
return vk::Format::eD32Sfloat;
case vk::Format::eR5G5B5A1UnormPack16:
return vk::Format::eA1R5G5B5UnormPack16;
case vk::Format::eR8G8B8Unorm:
return vk::Format::eR8G8B8A8Unorm;
case vk::Format::eUndefined:
return vk::Format::eUndefined;
case vk::Format::eR4G4B4A4UnormPack16:
// B4G4R4A4 is not guaranteed by the spec to support attachments
return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16);
default:
LOG_WARNING(Render_Vulkan, "Format {} doesn't support attachments, falling back to RGBA8",
vk::to_string(format));
return vk::Format::eR8G8B8A8Unorm;
}
}
bool Instance::CreateDevice() {
auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>();
// Not having geometry shaders will cause issues with accelerated rendering.
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
if (!available.geometryShader) {
LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
}
auto extension_list = physical_device.enumerateDeviceExtensionProperties();
if (extension_list.empty()) {
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
return false;
}
// Helper lambda for adding extensions
std::array<const char*, 6> enabled_extensions;
u32 enabled_extension_count = 0;
auto AddExtension = [&](std::string_view name) -> bool {
auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
return name.compare(prop.extensionName.data());
});
if (result != extension_list.end()) {
LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
enabled_extensions[enabled_extension_count++] = name.data();
return true;
}
LOG_WARNING(Render_Vulkan, "Extension {} unavailable.", name);
return false;
};
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
// Search queue families for graphics and present queues
auto family_properties = physical_device.getQueueFamilyProperties();
if (family_properties.empty()) {
LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
return false;
}
bool graphics_queue_found = false;
bool present_queue_found = false;
for (std::size_t i = 0; i < family_properties.size(); i++) {
// Check if queue supports graphics
const u32 index = static_cast<u32>(i);
if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) {
graphics_queue_family_index = index;
graphics_queue_found = true;
// If this queue also supports presentation we are finished
if (physical_device.getSurfaceSupportKHR(static_cast<u32>(i), surface)) {
present_queue_family_index = index;
present_queue_found = true;
break;
}
}
// Check if queue supports presentation
if (physical_device.getSurfaceSupportKHR(index, surface)) {
present_queue_family_index = index;
present_queue_found = true;
}
}
if (!graphics_queue_found || !present_queue_found) {
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
return false;
}
static constexpr float queue_priorities[] = {1.0f};
const std::array queue_infos = {
vk::DeviceQueueCreateInfo{
.queueFamilyIndex = graphics_queue_family_index,
.queueCount = 1,
.pQueuePriorities = queue_priorities
},
vk::DeviceQueueCreateInfo{
.queueFamilyIndex = present_queue_family_index,
.queueCount = 1,
.pQueuePriorities = queue_priorities
}
};
const u32 queue_count = graphics_queue_family_index != present_queue_family_index ? 2u : 1u;
const vk::StructureChain device_chain = {
vk::DeviceCreateInfo{
.queueCreateInfoCount = queue_count,
.pQueueCreateInfos = queue_infos.data(),
.enabledExtensionCount = enabled_extension_count,
.ppEnabledExtensionNames = enabled_extensions.data(),
},
vk::PhysicalDeviceFeatures2{
.features = {
.robustBufferAccess = available.robustBufferAccess,
.geometryShader = available.geometryShader,
.dualSrcBlend = available.dualSrcBlend,
.logicOp = available.logicOp,
.depthClamp = available.depthClamp,
.largePoints = available.largePoints,
.samplerAnisotropy = available.samplerAnisotropy,
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
.shaderClipDistance = available.shaderClipDistance
}
},
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
.depthClipControl = true
},
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>()
};
// Create logical device
device = physical_device.createDevice(device_chain.get());
VULKAN_HPP_DEFAULT_DISPATCHER.init(device);
// Grab the graphics and present queues.
graphics_queue = device.getQueue(graphics_queue_family_index, 0);
present_queue = device.getQueue(present_queue_family_index, 0);
CreateAllocator();
return true;
}
void Instance::CreateAllocator() {
const VmaVulkanFunctions functions = {
.vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
.vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
};
const VmaAllocatorCreateInfo allocator_info = {
.physicalDevice = physical_device,
.device = device,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = VK_API_VERSION_1_1
};
if (VkResult result = vmaCreateAllocator(&allocator_info, &allocator); result != VK_SUCCESS) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize VMA with error {}", result);
UNREACHABLE();
}
}
} // namespace Vulkan

View File

@ -0,0 +1,129 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Frontend {
class EmuWindow;
}
namespace Vulkan {
/// The global Vulkan instance
class Instance {
public:
Instance(Frontend::EmuWindow& window);
~Instance();
/// Returns true when the format supports the provided feature flags
bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
/// Returns the most compatible format that supports the provided feature flags
vk::Format GetFormatAlternative(vk::Format format) const;
/// Returns the Vulkan instance
vk::Instance GetInstance() const {
return instance;
}
/// Returns the Vulkan surface
vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Returns the current physical device
vk::PhysicalDevice GetPhysicalDevice() const {
return physical_device;
}
/// Returns the Vulkan device
vk::Device GetDevice() const {
return device;
}
VmaAllocator GetAllocator() const {
return allocator;
}
/// Retrieve queue information
u32 GetGraphicsQueueFamilyIndex() const {
return graphics_queue_family_index;
}
u32 GetPresentQueueFamilyIndex() const {
return present_queue_family_index;
}
vk::Queue GetGraphicsQueue() const {
return graphics_queue;
}
vk::Queue GetPresentQueue() const {
return present_queue;
}
/// Returns true when VK_KHR_timeline_semaphore is supported
bool IsTimelineSemaphoreSupported() const {
return timeline_semaphores;
}
/// Returns true when VK_EXT_extended_dynamic_state is supported
bool IsExtendedDynamicStateSupported() const {
return extended_dynamic_state;
}
/// Returns true when VK_KHR_push_descriptors is supported
bool IsPushDescriptorsSupported() const {
return push_descriptors;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return device_properties.vendorID;
}
/// Returns the device ID of the physical device
u32 GetDeviceID() const {
return device_properties.deviceID;
}
/// Returns the pipeline cache unique identifier
const auto GetPipelineCacheUUID() const {
return device_properties.pipelineCacheUUID;
}
/// Returns the minimum required alignment for uniforms
vk::DeviceSize UniformMinAlignment() const {
return device_properties.limits.minUniformBufferOffsetAlignment;
}
private:
/// Creates the logical device opportunistically enabling extensions
bool CreateDevice();
/// Creates the VMA allocator handle
void CreateAllocator();
private:
vk::Device device;
vk::PhysicalDevice physical_device;
vk::Instance instance;
vk::SurfaceKHR surface;
vk::PhysicalDeviceProperties device_properties;
VmaAllocator allocator;
vk::Queue present_queue;
vk::Queue graphics_queue;
u32 present_queue_family_index = 0;
u32 graphics_queue_family_index = 0;
bool timeline_semaphores = false;
bool extended_dynamic_state = false;
bool push_descriptors = false;
};
} // namespace Vulkan

View File

@ -0,0 +1,714 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <filesystem>
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
struct Bindings {
std::array<vk::DescriptorType, MAX_DESCRIPTORS> bindings;
u32 binding_count;
};
constexpr u32 RASTERIZER_SET_COUNT = 4;
constexpr static std::array RASTERIZER_SETS = {
Bindings{
// Utility set
.bindings = {
vk::DescriptorType::eUniformBuffer,
vk::DescriptorType::eUniformBuffer,
vk::DescriptorType::eUniformTexelBuffer,
vk::DescriptorType::eUniformTexelBuffer,
vk::DescriptorType::eUniformTexelBuffer
},
.binding_count = 5
},
Bindings{
// Texture set
.bindings = {
vk::DescriptorType::eSampledImage,
vk::DescriptorType::eSampledImage,
vk::DescriptorType::eSampledImage,
vk::DescriptorType::eSampledImage
},
.binding_count = 4
},
Bindings{
// Sampler set
.bindings = {
vk::DescriptorType::eSampler,
vk::DescriptorType::eSampler,
vk::DescriptorType::eSampler,
vk::DescriptorType::eSampler
},
.binding_count = 4
},
Bindings {
// Shadow set
.bindings = {
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage,
vk::DescriptorType::eStorageImage
},
.binding_count = 7
}
};
constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) {
vk::ShaderStageFlags flags;
switch (type) {
case vk::DescriptorType::eSampler:
case vk::DescriptorType::eSampledImage:
case vk::DescriptorType::eUniformTexelBuffer:
case vk::DescriptorType::eStorageImage:
flags = vk::ShaderStageFlagBits::eFragment;
break;
case vk::DescriptorType::eUniformBuffer:
case vk::DescriptorType::eUniformBufferDynamic:
flags = vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eCompute;
break;
default:
LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
}
return flags;
}
u32 AttribBytes(VertexAttribute attrib) {
switch (attrib.type) {
case AttribType::Float:
return sizeof(float) * attrib.size;
case AttribType::Int:
return sizeof(u32) * attrib.size;
case AttribType::Short:
return sizeof(u16) * attrib.size;
case AttribType::Byte:
case AttribType::Ubyte:
return sizeof(u8) * attrib.size;
}
}
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
switch (attrib.type) {
case AttribType::Float:
switch (attrib.size) {
case 1: return vk::Format::eR32Sfloat;
case 2: return vk::Format::eR32G32Sfloat;
case 3: return vk::Format::eR32G32B32Sfloat;
case 4: return vk::Format::eR32G32B32A32Sfloat;
}
default:
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
UNREACHABLE();
}
return vk::Format::eR32Sfloat;
}
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
switch (index) {
case 0: return vk::ShaderStageFlagBits::eVertex;
case 1: return vk::ShaderStageFlagBits::eFragment;
case 2: return vk::ShaderStageFlagBits::eGeometry;
default:
LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!");
UNREACHABLE();
}
return vk::ShaderStageFlagBits::eVertex;
}
PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
descriptor_dirty.fill(true);
LoadDiskCache();
BuildLayout();
trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
}
PipelineCache::~PipelineCache() {
vk::Device device = instance.GetDevice();
SaveDiskCache();
device.destroyPipelineLayout(layout);
device.destroyPipelineCache(pipeline_cache);
device.destroyShaderModule(trivial_vertex_shader);
for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
device.destroyDescriptorSetLayout(descriptor_set_layouts[i]);
device.destroyDescriptorUpdateTemplate(update_templates[i]);
}
for (auto& [key, module] : programmable_vertex_shaders.shader_cache) {
device.destroyShaderModule(module);
}
for (auto& [key, module] : fixed_geometry_shaders.shaders) {
device.destroyShaderModule(module);
}
for (auto& [key, module] : fragment_shaders.shaders) {
device.destroyShaderModule(module);
}
for (const auto& [hash, pipeline] : graphics_pipelines) {
device.destroyPipeline(pipeline);
}
graphics_pipelines.clear();
}
void PipelineCache::BindPipeline(const PipelineInfo& info) {
ApplyDynamic(info);
// When texture downloads occur the runtime will flush the GPU and cause
// a scheduler slot switch behind our back. This might invalidate any
// cached descriptor sets/require pipeline rebinding.
if (timestamp != scheduler.GetHostFenceCounter()) {
MarkDirty();
}
u64 shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() ?
offsetof(PipelineInfo, rasterization) :
offsetof(PipelineInfo, depth_stencil) + offsetof(DepthStencilState, stencil_reference);
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
if (new_pipeline) {
it->second = BuildPipeline(info);
}
if (it->second != current_pipeline) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
current_pipeline = it->second;
}
BindDescriptorSets();
}
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) {
const PicaVSConfig config{regs.vs, setup};
auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
if (!handle) {
return false;
}
current_shaders[ProgramType::VS] = handle;
shader_hashes[ProgramType::VS] = config.Hash();
return true;
}
void PipelineCache::UseTrivialVertexShader() {
current_shaders[ProgramType::VS] = trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
}
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
const PicaFixedGSConfig gs_config{regs};
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::Debug);
current_shaders[ProgramType::GS] = handle;
shader_hashes[ProgramType::GS] = gs_config.Hash();
}
void PipelineCache::UseTrivialGeometryShader() {
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
shader_hashes[ProgramType::GS] = 0;
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
instance.GetDevice(), ShaderOptimization::Debug);
current_shaders[ProgramType::FS] = handle;
shader_hashes[ProgramType::FS] = config.Hash();
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = {
.imageView = image_view,
.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal
};
SetBinding(1, binding, DescriptorData{image_info});
}
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = {
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral
};
SetBinding(3, binding, DescriptorData{image_info});
}
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
const DescriptorData data = {
.buffer_info = vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size
}
};
SetBinding(0, binding, data);
}
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
const DescriptorData data = {
.buffer_view = buffer_view
};
SetBinding(0, binding, data);
}
void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
const DescriptorData data = {
.image_info = vk::DescriptorImageInfo{
.sampler = sampler
}
};
SetBinding(2, binding, data);
}
void PipelineCache::SetViewport(float x, float y, float width, float height) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f});
}
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}});
}
void PipelineCache::MarkDirty() {
descriptor_dirty.fill(true);
current_pipeline = VK_NULL_HANDLE;
timestamp = scheduler.GetHostFenceCounter();
}
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
if (instance.IsExtendedDynamicStateSupported()) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(info.rasterization.topology));
}
}
void PipelineCache::SetBinding(u32 set, u32 binding, DescriptorData data) {
if (update_data[set][binding] != data) {
update_data[set][binding] = data;
descriptor_dirty[set] = true;
}
}
void PipelineCache::BuildLayout() {
std::array<vk::DescriptorSetLayoutBinding, MAX_DESCRIPTORS> set_bindings;
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
const auto& set = RASTERIZER_SETS[i];
for (u32 j = 0; j < set.binding_count; j++) {
vk::DescriptorType type = set.bindings[j];
set_bindings[j] = vk::DescriptorSetLayoutBinding{
.binding = j,
.descriptorType = type,
.descriptorCount = 1,
.stageFlags = ToVkStageFlags(type)
};
update_entries[j] = vk::DescriptorUpdateTemplateEntry{
.dstBinding = j,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = type,
.offset = j * sizeof(DescriptorData),
.stride = 0
};
}
const vk::DescriptorSetLayoutCreateInfo layout_info = {
.bindingCount = set.binding_count,
.pBindings = set_bindings.data()
};
// Create descriptor set layout
descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info);
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = set.binding_count,
.pDescriptorUpdateEntries = update_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_set_layouts[i]
};
// Create descriptor set update template
update_templates[i] = device.createDescriptorUpdateTemplate(template_info);
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = RASTERIZER_SET_COUNT,
.pSetLayouts = descriptor_set_layouts.data(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr
};
layout = device.createPipelineLayout(layout_info);
}
vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
vk::Device device = instance.GetDevice();
u32 shader_count = 0;
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
for (std::size_t i = 0; i < current_shaders.size(); i++) {
vk::ShaderModule shader = current_shaders[i];
if (!shader) {
continue;
}
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
.stage = ToVkShaderStage(i),
.module = shader,
.pName = "main"
};
}
/**
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing
* data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate.
* Since one instance is all we render, the shader will always read the single attribute.
*/
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
const auto& binding = info.vertex_layout.bindings[i];
bindings[i] = vk::VertexInputBindingDescription{
.binding = binding.binding,
.stride = binding.stride,
.inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance
: vk::VertexInputRate::eVertex
};
}
// Populate vertex attribute structures
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
const auto& attr = info.vertex_layout.attributes[i];
attributes[i] = vk::VertexInputAttributeDescription{
.location = attr.location,
.binding = attr.binding,
.format = ToVkAttributeFormat(attr),
.offset = attr.offset
};
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
.pVertexAttributeDescriptions = attributes.data()
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
.primitiveRestartEnable = false
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
.frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode),
.depthBiasEnable = false,
.lineWidth = 1.0f
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = false
};
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
.blendEnable = info.blending.blend_enable.Value(),
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
};
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = info.blending.logic_op_enable.Value(),
.logicOp = PicaToVK::LogicOp(info.blending.logic_op),
.attachmentCount = 1,
.pAttachments = &colorblend_attachment,
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}
};
const vk::Viewport viewport = {
.x = 0.0f,
.y = 0.0f,
.width = 1.0f,
.height = 1.0f,
.minDepth = 0.0f,
.maxDepth = 1.0f
};
const vk::Rect2D scissor = {
.offset = {0, 0},
.extent = {1, 1}
};
vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = {
.negativeOneToOne = true
};
const vk::PipelineViewportStateCreateInfo viewport_info = {
.pNext = &depth_clip_control,
.viewportCount = 1,
.pViewports = &viewport,
.scissorCount = 1,
.pScissors = &scissor,
};
const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
const std::array dynamic_states = {
vk::DynamicState::eViewport,
vk::DynamicState::eScissor,
vk::DynamicState::eStencilCompareMask,
vk::DynamicState::eStencilWriteMask,
vk::DynamicState::eStencilReference,
vk::DynamicState::eBlendConstants,
// VK_EXT_extended_dynamic_state
vk::DynamicState::eCullModeEXT,
vk::DynamicState::eDepthCompareOpEXT,
vk::DynamicState::eDepthTestEnableEXT,
vk::DynamicState::eDepthWriteEnableEXT,
vk::DynamicState::eFrontFaceEXT,
vk::DynamicState::ePrimitiveTopologyEXT,
vk::DynamicState::eStencilOpEXT,
vk::DynamicState::eStencilTestEnableEXT,
};
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount =
extended_dynamic_states ? static_cast<u32>(dynamic_states.size()) : 6u,
.pDynamicStates = dynamic_states.data()
};
const vk::StencilOpState stencil_op_state = {
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)
};
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
.depthBoundsTestEnable = false,
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
.front = stencil_op_state,
.back = stencil_op_state
};
const vk::GraphicsPipelineCreateInfo pipeline_info = {
.stageCount = shader_count,
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = layout,
.renderPass = renderpass_cache.GetRenderpass(info.color_attachment,
info.depth_attachment, false)
};
if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info);
result.result == vk::Result::eSuccess) {
return result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
UNREACHABLE();
}
return VK_NULL_HANDLE;
}
static_assert(sizeof(vk::DescriptorBufferInfo) == sizeof(VkDescriptorBufferInfo));
void PipelineCache::BindDescriptorSets() {
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
if (descriptor_dirty[i] || !descriptor_sets[i]) {
const vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_set_layouts[i]
};
vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0];
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]);
descriptor_sets[i] = set;
descriptor_dirty[i] = false;
}
}
// Bind the descriptor sets
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, RASTERIZER_SET_COUNT,
descriptor_sets.data(), 0, nullptr);
}
void PipelineCache::LoadDiskCache() {
if (!EnsureDirectories()) {
return;
}
const std::string cache_file_path = GetPipelineCacheDir() + DIR_SEP "pipelines.bin";
vk::PipelineCacheCreateInfo cache_info = {
.initialDataSize = 0,
.pInitialData = nullptr
};
FileUtil::IOFile cache_file{cache_file_path, "r"};
if (cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Loading pipeline cache");
const u32 cache_file_size = cache_file.GetSize();
auto cache_data = std::vector<u8>(cache_file_size);
if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
if (!IsCacheValid(cache_data.data(), cache_file_size)) {
LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid");
} else {
cache_info.initialDataSize = cache_file_size;
cache_info.pInitialData = cache_data.data();
}
}
cache_file.Close();
}
vk::Device device = instance.GetDevice();
pipeline_cache = device.createPipelineCache(cache_info);
}
void PipelineCache::SaveDiskCache() {
if (!EnsureDirectories()) {
return;
}
const std::string cache_file_path = GetPipelineCacheDir() + DIR_SEP "pipelines.bin";
FileUtil::IOFile cache_file{cache_file_path, "wb"};
if (!cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing");
return;
}
vk::Device device = instance.GetDevice();
auto cache_data = device.getPipelineCacheData(pipeline_cache);
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
LOG_WARNING(Render_Vulkan, "Error during pipeline cache write");
return;
}
cache_file.Close();
}
bool PipelineCache::IsCacheValid(const u8* data, u32 size) const {
if (size < sizeof(vk::PipelineCacheHeaderVersionOne)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");
return false;
}
vk::PipelineCacheHeaderVersionOne header;
std::memcpy(&header, data, sizeof(header));
if (header.headerSize < sizeof(header)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length");
return false;
}
if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version");
return false;
}
if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) {
LOG_ERROR(Render_Vulkan,
"Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})",
header.vendorID, vendor_id);
return false;
}
if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) {
LOG_ERROR(Render_Vulkan,
"Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})",
header.deviceID, device_id);
return false;
}
if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID");
return false;
}
return true;
}
bool PipelineCache::EnsureDirectories() const {
const auto CreateDir = [](const std::string& dir) {
if (!FileUtil::CreateDir(dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir);
return false;
}
return true;
};
return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
CreateDir(GetPipelineCacheDir());
}
std::string PipelineCache::GetPipelineCacheDir() const {
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan";
}
} // namespace Vulkan

View File

@ -0,0 +1,268 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include "common/bit_field.h"
#include "common/hash.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
#include "video_core/shader/shader_cache.h"
#include "video_core/regs.h"
namespace Vulkan {
constexpr u32 MAX_SHADER_STAGES = 3;
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
constexpr u32 MAX_VERTEX_BINDINGS = 16;
constexpr u32 MAX_DESCRIPTORS = 8;
constexpr u32 MAX_DESCRIPTOR_SETS = 6;
enum class AttribType : u32 {
Float = 0,
Int = 1,
Short = 2,
Byte = 3,
Ubyte = 4
};
/**
* The pipeline state is tightly packed with bitfields to reduce
* the overhead of hashing as much as possible
*/
union RasterizationState {
u8 value = 0;
BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology;
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
};
struct DepthStencilState {
union {
u32 value = 0;
BitField<0, 1, u32> depth_test_enable;
BitField<1, 1, u32> depth_write_enable;
BitField<2, 1, u32> stencil_test_enable;
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
};
// These are dynamic state so keep them separate
u8 stencil_reference;
u8 stencil_compare_mask;
u8 stencil_write_mask;
};
union BlendingState {
u32 value = 0;
BitField<0, 1, u32> blend_enable;
BitField<1, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor;
BitField<5, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor;
BitField<9, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq;
BitField<12, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor;
BitField<16, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor;
BitField<20, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq;
BitField<23, 4, u32> color_write_mask;
BitField<27, 1, u32> logic_op_enable;
BitField<28, 4, Pica::FramebufferRegs::LogicOp> logic_op;
};
union VertexBinding {
u16 value = 0;
BitField<0, 4, u16> binding;
BitField<4, 1, u16> fixed;
BitField<5, 11, u16> stride;
};
union VertexAttribute {
u32 value = 0;
BitField<0, 4, u32> binding;
BitField<4, 4, u32> location;
BitField<8, 3, AttribType> type;
BitField<11, 3, u32> size;
BitField<14, 11, u32> offset;
};
struct VertexLayout {
u8 binding_count;
u8 attribute_count;
std::array<VertexBinding, MAX_VERTEX_BINDINGS> bindings;
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
};
/**
* Information about a graphics/compute pipeline
*/
struct PipelineInfo {
VertexLayout vertex_layout{};
BlendingState blending{};
VideoCore::PixelFormat color_attachment = VideoCore::PixelFormat::RGBA8;
VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
RasterizationState rasterization{};
DepthStencilState depth_stencil{};
bool IsDepthWriteEnabled() const {
const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8;
const bool depth_write =
depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
const bool stencil_write =
has_stencil && depth_stencil.stencil_test_enable && depth_stencil.stencil_write_mask != 0;
return depth_write || stencil_write;
}
};
union DescriptorData {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
bool operator!=(const DescriptorData& other) const {
return std::memcmp(this, &other, sizeof(DescriptorData)) != 0;
}
};
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
/**
* Vulkan specialized PICA shader caches
*/
using ProgrammableVertexShaders =
Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule, &Compile, &GenerateVertexShader>;
using FixedGeometryShaders =
Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule, &Compile, &GenerateFixedGeometryShader>;
using FragmentShaders =
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
class Instance;
class TaskScheduler;
class RenderpassCache;
/**
* Stores a collection of rasterizer pipelines used during rendering.
* In addition handles descriptor set management.
*/
class PipelineCache {
public:
PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache);
~PipelineCache();
/// Binds a pipeline using the provided information
void BindPipeline(const PipelineInfo& info);
/// Binds a PICA decompiled vertex shader
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup);
/// Binds a passthrough vertex shader
void UseTrivialVertexShader();
/// Binds a PICA decompiled geometry shader
void UseFixedGeometryShader(const Pica::Regs& regs);
/// Binds a passthrough geometry shader
void UseTrivialGeometryShader();
/// Binds a fragment shader generated from PICA state
void UseFragmentShader(const Pica::Regs& regs);
/// Binds a texture to the specified binding
void BindTexture(u32 binding, vk::ImageView image_view);
/// Binds a storage image to the specified binding
void BindStorageImage(u32 binding, vk::ImageView image_view);
/// Binds a buffer to the specified binding
void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size);
/// Binds a buffer to the specified binding
void BindTexelBuffer(u32 binding, vk::BufferView buffer_view);
/// Binds a sampler to the specified binding
void BindSampler(u32 binding, vk::Sampler sampler);
/// Sets the viewport rectangle to the provided values
void SetViewport(float x, float y, float width, float height);
/// Sets the scissor rectange to the provided values
void SetScissor(s32 x, s32 y, u32 width, u32 height);
/// Marks all cached pipeline cache state as dirty
void MarkDirty();
private:
/// Binds a resource to the provided binding
void SetBinding(u32 set, u32 binding, DescriptorData data);
/// Applies dynamic pipeline state to the current command buffer
void ApplyDynamic(const PipelineInfo& info);
/// Builds the rasterizer pipeline layout
void BuildLayout();
/// Builds a rasterizer pipeline using the PipelineInfo struct
vk::Pipeline BuildPipeline(const PipelineInfo& info);
/// Builds descriptor sets that reference the currently bound resources
void BindDescriptorSets();
/// Loads the pipeline cache stored to disk
void LoadDiskCache();
/// Stores the generated pipeline cache to disk
void SaveDiskCache();
/// Returns true when the disk data can be used by the current driver
bool IsCacheValid(const u8* data, u32 size) const;
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
/// Returns the pipeline cache storage dir
std::string GetPipelineCacheDir() const;
private:
const Instance& instance;
TaskScheduler& scheduler;
RenderpassCache& renderpass_cache;
// Cached pipelines
vk::PipelineCache pipeline_cache;
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
vk::Pipeline current_pipeline{};
// Cached layouts for the rasterizer pipelines
vk::PipelineLayout layout;
std::array<vk::DescriptorSetLayout, MAX_DESCRIPTOR_SETS> descriptor_set_layouts;
std::array<vk::DescriptorUpdateTemplate, MAX_DESCRIPTOR_SETS> update_templates;
// Current data for the descriptor sets
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
u64 timestamp = 0;
// Bound shader modules
enum ProgramType : u32 {
VS = 0,
GS = 2,
FS = 1
};
std::array<vk::ShaderModule, MAX_SHADER_STAGES> current_shaders;
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
ProgrammableVertexShaders programmable_vertex_shaders;
FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders;
vk::ShaderModule trivial_vertex_shader;
};
} // namespace Vulkan

View File

@ -0,0 +1,131 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Include the vulkan platform specific header
#if defined(ANDROID) || defined (__ANDROID__)
#define VK_USE_PLATFORM_ANDROID_KHR
#elif defined(_WIN32)
#define VK_USE_PLATFORM_WIN32_KHR
#elif defined(__APPLE__)
#define VK_USE_PLATFORM_MACOS_MVK
#define VK_USE_PLATFORM_METAL_EXT
#else
#define VK_USE_PLATFORM_WAYLAND_KHR
#define VK_USE_PLATFORM_XLIB_KHR
#endif
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_platform.h"
namespace Vulkan {
vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window) {
const auto& window_info = emu_window.GetWindowInfo();
vk::SurfaceKHR surface{};
// Perform instance function loading here, to also load window system functions
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance);
#if defined(VK_USE_PLATFORM_WIN32_KHR)
if (window_info.type == Frontend::WindowSystemType::Windows) {
const vk::Win32SurfaceCreateInfoKHR win32_ci = {
.hinstance = nullptr,
.hwnd = static_cast<HWND>(window_info.render_surface)
};
if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
UNREACHABLE();
}
}
#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR)
if (window_info.type == Frontend::WindowSystemType::X11) {
const vk::XlibSurfaceCreateInfoKHR xlib_ci = {
.dpy = static_cast<Display*>(window_info.display_connection),
.window = reinterpret_cast<Window>(window_info.render_surface)
};
if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
UNREACHABLE();
}
}
if (window_info.type == Frontend::WindowSystemType::Wayland) {
const vk::WaylandSurfaceCreateInfoKHR wayland_ci = {
.display = static_cast<wl_display*>(window_info.display_connection),
.surface = static_cast<wl_surface*>(window_info.render_surface)
};
if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
UNREACHABLE();
}
}
#endif
if (!surface) {
LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
}
return surface;
}
std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
const auto properties = vk::enumerateInstanceExtensionProperties();
if (properties.empty()) {
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
return std::vector<const char*>{};
}
// Add the windowing system specific extension
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
case Frontend::WindowSystemType::Headless:
break;
#if defined(VK_USE_PLATFORM_WIN32_KHR)
case Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR)
case Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
case Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
for (const char* extension : extensions) {
const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
return std::strcmp(extension, prop.extensionName) == 0;
});
if (iter == properties.end()) {
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
return std::vector<const char*>{};
}
}
return extensions;
}
} // namespace Vulkan

View File

@ -0,0 +1,22 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Frontend {
class EmuWindow;
enum class WindowSystemType : u8;
}
namespace Vulkan {
std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils);
vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window);
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,317 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_uniforms.h"
namespace Frontend {
class EmuWindow;
}
namespace Vulkan {
struct ScreenInfo;
class Instance;
class TaskScheduler;
class RenderpassCache;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color = 0;
u32 lod_min = 0;
u32 lod_max = 0;
s32 lod_bias = 0;
// TODO(wwylele): remove this once mipmap for cube is implemented
bool supress_mipmap_for_cube = false;
auto operator<=>(const SamplerInfo&) const noexcept = default;
};
struct FramebufferInfo {
vk::ImageView color;
vk::ImageView depth;
vk::RenderPass renderpass;
u32 width = 1;
u32 height = 1;
auto operator<=>(const FramebufferInfo&) const noexcept = default;
};
}
namespace std {
template <>
struct hash<Vulkan::SamplerInfo> {
std::size_t operator()(const Vulkan::SamplerInfo& info) const noexcept {
return Common::ComputeHash64(&info, sizeof(Vulkan::SamplerInfo));
}
};
template <>
struct hash<Vulkan::FramebufferInfo> {
std::size_t operator()(const Vulkan::FramebufferInfo& info) const noexcept {
return Common::ComputeHash64(&info, sizeof(Vulkan::FramebufferInfo));
}
};
} // namespace std
namespace Vulkan {
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
friend class RendererVulkan;
public:
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, TaskScheduler& scheduler,
TextureRuntime& runtime, RenderpassCache& renderpass_cache);
~RasterizerVulkan() override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override;
void NotifyPicaRegisterChanged(u32 id) override;
void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override;
void InvalidateRegion(PAddr addr, u32 size) override;
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info);
bool AccelerateDrawBatch(bool is_indexed) override;
/// Syncs entire status to match PICA registers
void SyncEntireState() override;
/// Sync fixed function pipeline state
void SyncFixedState();
/// Flushes all rasterizer owned buffers
void FlushBuffers();
private:
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
/// Syncs the blend functions to match the PICA register
void SyncBlendFuncs();
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture bias configuration to match the PICA register
void SyncProcTexBias();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
/// Syncs the color write mask to match the PICA register state
void SyncColorWriteMask();
/// Syncs the stencil write mask to match the PICA register state
void SyncStencilWriteMask();
/// Syncs the depth write mask to match the PICA register state
void SyncDepthWriteMask();
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(std::size_t tev_index,
const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs the shadow texture bias to match the PICA register
void SyncShadowTextureBias();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
bool Draw(bool accelerate, bool is_indexed);
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
struct VertexArrayInfo {
u32 vs_input_index_min;
u32 vs_input_index_max;
u32 vs_input_size;
};
/// Retrieve the range and the size of the input vertex
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
/// Setup vertex shader for AccelerateDrawBatch
bool SetupVertexShader();
/// Setup geometry shader for AccelerateDrawBatch
bool SetupGeometryShader();
/// Creates a new sampler object
vk::Sampler CreateSampler(const SamplerInfo& info);
/// Creates a new Vulkan framebuffer object
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info);
private:
const Instance& instance;
TaskScheduler& scheduler;
TextureRuntime& runtime;
RenderpassCache& renderpass_cache;
RasterizerCache res_cache;
PipelineCache pipeline_cache;
bool shader_dirty = true;
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
constexpr static VertexLayout GetVertexLayout();
Common::Vec4f position;
Common::Vec4f color;
Common::Vec2f tex_coord0;
Common::Vec2f tex_coord1;
Common::Vec2f tex_coord2;
float tex_coord0_w;
Common::Vec4f normquat;
Common::Vec3f view;
};
std::vector<HardwareVertex> vertex_batch;
ImageAlloc default_texture;
vk::Sampler default_sampler;
struct {
Pica::Shader::UniformData data{};
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
bool lighting_lut_dirty_any = true;
bool fog_lut_dirty = true;
bool proctex_noise_lut_dirty = true;
bool proctex_color_map_dirty = true;
bool proctex_alpha_map_dirty = true;
bool proctex_lut_dirty = true;
bool proctex_diff_lut_dirty = true;
bool dirty = true;
} uniform_block_data = {};
std::array<bool, 16> hw_enabled_attributes{};
std::array<SamplerInfo, 3> texture_samplers;
SamplerInfo texture_cube_sampler;
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
StreamBuffer vertex_buffer;
StreamBuffer uniform_buffer;
StreamBuffer index_buffer;
StreamBuffer texture_buffer;
StreamBuffer texture_lf_buffer;
PipelineInfo pipeline_info;
std::size_t uniform_buffer_alignment;
std::size_t uniform_size_aligned_vs;
std::size_t uniform_size_aligned_fs;
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
lighting_lut_data{};
std::array<Common::Vec2f, 128> fog_lut_data{};
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
std::array<Common::Vec2f, 128> proctex_color_map_data{};
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
std::array<Common::Vec4f, 256> proctex_lut_data{};
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
};
} // namespace Vulkan

View File

@ -0,0 +1,196 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
vk::Format ToVkFormatColor(u32 index) {
switch (index) {
case 0: return vk::Format::eR8G8B8A8Unorm;
case 1: return vk::Format::eR8G8B8Unorm;
case 2: return vk::Format::eR5G5B5A1UnormPack16;
case 3: return vk::Format::eR5G6B5UnormPack16;
case 4: return vk::Format::eR4G4B4A4UnormPack16;
default: return vk::Format::eUndefined;
}
}
vk::Format ToVkFormatDepth(u32 index) {
switch (index) {
case 0: return vk::Format::eD16Unorm;
case 1: return vk::Format::eX8D24UnormPack32;
// Notice the similar gap in PixelFormat
case 3: return vk::Format::eD24UnormS8Uint;
default: return vk::Format::eUndefined;
}
}
RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
// Pre-create all needed renderpasses by the renderer
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
const vk::Format color_format =
instance.GetFormatAlternative(ToVkFormatColor(color));
const vk::Format depth_stencil_format =
instance.GetFormatAlternative(ToVkFormatDepth(depth));
if (color_format == vk::Format::eUndefined &&
depth_stencil_format == vk::Format::eUndefined) {
continue;
}
cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format,
vk::AttachmentLoadOp::eLoad,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal);
cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format,
vk::AttachmentLoadOp::eClear,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal);
}
}
}
RenderpassCache::~RenderpassCache() {
vk::Device device = instance.GetDevice();
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) {
if (vk::RenderPass load_pass = cached_renderpasses[color][depth][0]; load_pass) {
device.destroyRenderPass(load_pass);
}
if (vk::RenderPass clear_pass = cached_renderpasses[color][depth][1]; clear_pass) {
device.destroyRenderPass(clear_pass);
}
}
}
device.destroyRenderPass(present_renderpass);
}
void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) {
if (active_renderpass == begin_info.renderPass) {
return;
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (active_renderpass) {
command_buffer.endRenderPass();
}
command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline);
active_renderpass = begin_info.renderPass;
}
void RenderpassCache::ExitRenderpass() {
if (!active_renderpass) {
return;
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.endRenderPass();
active_renderpass = VK_NULL_HANDLE;
}
void RenderpassCache::CreatePresentRenderpass(vk::Format format) {
if (!present_renderpass) {
present_renderpass = CreateRenderPass(format, vk::Format::eUndefined,
vk::AttachmentLoadOp::eClear,
vk::ImageLayout::eUndefined,
vk::ImageLayout::ePresentSrcKHR);
}
}
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
bool is_clear) const {
const u32 color_index =
color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast<u32>(color);
const u32 depth_index =
depth == VideoCore::PixelFormat::Invalid ? MAX_DEPTH_FORMATS : (static_cast<u32>(depth) - 14);
ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS);
return cached_renderpasses[color_index][depth_index][is_clear];
}
vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const {
// Define attachments
u32 attachment_count = 0;
std::array<vk::AttachmentDescription, 2> attachments;
bool use_color = false;
vk::AttachmentReference color_attachment_ref{};
bool use_depth = false;
vk::AttachmentReference depth_attachment_ref{};
if (color != vk::Format::eUndefined) {
attachments[attachment_count] = vk::AttachmentDescription{
.format = color,
.loadOp = load_op,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
.initialLayout = initial_layout,
.finalLayout = final_layout
};
color_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++,
.layout = vk::ImageLayout::eColorAttachmentOptimal
};
use_color = true;
}
if (depth != vk::Format::eUndefined) {
attachments[attachment_count] = vk::AttachmentDescription{
.format = depth,
.loadOp = load_op,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal,
.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
};
depth_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++,
.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
};
use_depth = true;
}
// We also require only one subpass
const vk::SubpassDescription subpass = {
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = use_color ? 1u : 0u,
.pColorAttachments = &color_attachment_ref,
.pResolveAttachments = 0,
.pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr
};
const vk::RenderPassCreateInfo renderpass_info = {
.attachmentCount = attachment_count,
.pAttachments = attachments.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = nullptr
};
// Create the renderpass
vk::Device device = instance.GetDevice();
return device.createRenderPass(renderpass_info);
}
} // namespace VideoCore::Vulkan

View File

@ -0,0 +1,55 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class TaskScheduler;
constexpr u32 MAX_COLOR_FORMATS = 5;
constexpr u32 MAX_DEPTH_FORMATS = 4;
class RenderpassCache {
public:
RenderpassCache(const Instance& instance, TaskScheduler& scheduler);
~RenderpassCache();
/// Begins a new renderpass only when no other renderpass is currently active
void EnterRenderpass(const vk::RenderPassBeginInfo begin_info);
/// Exits from any currently active renderpass instance
void ExitRenderpass();
/// Returns the renderpass associated with the color-depth format pair
[[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
bool is_clear) const;
/// Returns the swapchain clear renderpass
[[nodiscard]] vk::RenderPass GetPresentRenderpass() const {
return present_renderpass;
}
/// Creates the renderpass used when rendering to the swapchain
void CreatePresentRenderpass(vk::Format format);
private:
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op,
vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const;
private:
const Instance& instance;
TaskScheduler& scheduler;
vk::RenderPass active_renderpass = VK_NULL_HANDLE;
vk::RenderPass present_renderpass{};
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2];
};
} // namespace Vulkan

View File

@ -0,0 +1,223 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include <glslang/Public/ShaderLang.h>
#include <glslang/Include/ResourceLimits.h>
#include <SPIRV/GlslangToSpv.h>
namespace Vulkan {
constexpr TBuiltInResource DefaultTBuiltInResource = {
.maxLights = 32,
.maxClipPlanes = 6,
.maxTextureUnits = 32,
.maxTextureCoords = 32,
.maxVertexAttribs = 64,
.maxVertexUniformComponents = 4096,
.maxVaryingFloats = 64,
.maxVertexTextureImageUnits = 32,
.maxCombinedTextureImageUnits = 80,
.maxTextureImageUnits = 32,
.maxFragmentUniformComponents = 4096,
.maxDrawBuffers = 32,
.maxVertexUniformVectors = 128,
.maxVaryingVectors = 8,
.maxFragmentUniformVectors = 16,
.maxVertexOutputVectors = 16,
.maxFragmentInputVectors = 15,
.minProgramTexelOffset = -8,
.maxProgramTexelOffset = 7,
.maxClipDistances = 8,
.maxComputeWorkGroupCountX = 65535,
.maxComputeWorkGroupCountY = 65535,
.maxComputeWorkGroupCountZ = 65535,
.maxComputeWorkGroupSizeX = 1024,
.maxComputeWorkGroupSizeY = 1024,
.maxComputeWorkGroupSizeZ = 64,
.maxComputeUniformComponents = 1024,
.maxComputeTextureImageUnits = 16,
.maxComputeImageUniforms = 8,
.maxComputeAtomicCounters = 8,
.maxComputeAtomicCounterBuffers = 1,
.maxVaryingComponents = 60,
.maxVertexOutputComponents = 64,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxFragmentInputComponents = 128,
.maxImageUnits = 8,
.maxCombinedImageUnitsAndFragmentOutputs = 8,
.maxCombinedShaderOutputResources = 8,
.maxImageSamples = 0,
.maxVertexImageUniforms = 0,
.maxTessControlImageUniforms = 0,
.maxTessEvaluationImageUniforms = 0,
.maxGeometryImageUniforms = 0,
.maxFragmentImageUniforms = 8,
.maxCombinedImageUniforms = 8,
.maxGeometryTextureImageUnits = 16,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
.maxGeometryUniformComponents = 1024,
.maxGeometryVaryingComponents = 64,
.maxTessControlInputComponents = 128,
.maxTessControlOutputComponents = 128,
.maxTessControlTextureImageUnits = 16,
.maxTessControlUniformComponents = 1024,
.maxTessControlTotalOutputComponents = 4096,
.maxTessEvaluationInputComponents = 128,
.maxTessEvaluationOutputComponents = 128,
.maxTessEvaluationTextureImageUnits = 16,
.maxTessEvaluationUniformComponents = 1024,
.maxTessPatchComponents = 120,
.maxPatchVertices = 32,
.maxTessGenLevel = 64,
.maxViewports = 16,
.maxVertexAtomicCounters = 0,
.maxTessControlAtomicCounters = 0,
.maxTessEvaluationAtomicCounters = 0,
.maxGeometryAtomicCounters = 0,
.maxFragmentAtomicCounters = 8,
.maxCombinedAtomicCounters = 8,
.maxAtomicCounterBindings = 1,
.maxVertexAtomicCounterBuffers = 0,
.maxTessControlAtomicCounterBuffers = 0,
.maxTessEvaluationAtomicCounterBuffers = 0,
.maxGeometryAtomicCounterBuffers = 0,
.maxFragmentAtomicCounterBuffers = 1,
.maxCombinedAtomicCounterBuffers = 1,
.maxAtomicCounterBufferSize = 16384,
.maxTransformFeedbackBuffers = 4,
.maxTransformFeedbackInterleavedComponents = 64,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.maxSamples = 4,
.maxMeshOutputVerticesNV = 256,
.maxMeshOutputPrimitivesNV = 512,
.maxMeshWorkGroupSizeX_NV = 32,
.maxMeshWorkGroupSizeY_NV = 1,
.maxMeshWorkGroupSizeZ_NV = 1,
.maxTaskWorkGroupSizeX_NV = 32,
.maxTaskWorkGroupSizeY_NV = 1,
.maxTaskWorkGroupSizeZ_NV = 1,
.maxMeshViewCountNV = 4,
.maxDualSourceDrawBuffersEXT = 1,
.limits = TLimits{
.nonInductiveForLoops = 1,
.whileLoops = 1,
.doWhileLoops = 1,
.generalUniformIndexing = 1,
.generalAttributeMatrixVectorIndexing = 1,
.generalVaryingIndexing = 1,
.generalSamplerIndexing = 1,
.generalVariableIndexing = 1,
.generalConstantMatrixVectorIndexing = 1,
}
};
EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) {
switch (stage) {
case vk::ShaderStageFlagBits::eVertex:
return EShLanguage::EShLangVertex;
case vk::ShaderStageFlagBits::eGeometry:
return EShLanguage::EShLangGeometry;
case vk::ShaderStageFlagBits::eFragment:
return EShLanguage::EShLangFragment;
case vk::ShaderStageFlagBits::eCompute:
return EShLanguage::EShLangCompute;
default:
LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
UNREACHABLE();
}
return EShLanguage::EShLangVertex;
}
bool InitializeCompiler() {
static bool glslang_initialized = false;
if (glslang_initialized) {
return true;
}
if (!glslang::InitializeProcess()) {
LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
return false;
}
std::atexit([]() { glslang::FinalizeProcess(); });
glslang_initialized = true;
return true;
}
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
ShaderOptimization level) {
if (!InitializeCompiler()) {
return VK_NULL_HANDLE;
}
EProfile profile = ECoreProfile;
EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
EShLanguage lang = ToEshShaderStage(stage);
int default_version = 450;
const char* pass_source_code = code.data();
int pass_source_code_length = static_cast<int>(code.size());
auto shader = std::make_unique<glslang::TShader>(lang);
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
glslang::TShader::ForbidIncluder includer;
if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
return VK_NULL_HANDLE;
}
// Even though there's only a single shader, we still need to link it to generate SPV
auto program = std::make_unique<glslang::TProgram>();
program->addShader(shader.get());
if (!program->link(messages)) {
LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
return VK_NULL_HANDLE;
}
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
std::vector<u32> out_code;
spv::SpvBuildLogger logger;
glslang::SpvOptions options;
// Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
if (level == ShaderOptimization::Debug) {
intermediate->addSourceText(pass_source_code, pass_source_code_length);
options.generateDebugInfo = true;
options.disableOptimizer = true;
options.optimizeSize = false;
options.disassemble = false;
options.validate = true;
} else {
options.disableOptimizer = false;
options.stripDebugInfo = true;
}
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
const std::string spv_messages = logger.getAllMessages();
if (!spv_messages.empty()) {
LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
}
const vk::ShaderModuleCreateInfo shader_info = {
.codeSize = out_code.size() * sizeof(u32),
.pCode = out_code.data()
};
return device.createShaderModule(shader_info);
}
} // namespace Vulkan

View File

@ -0,0 +1,19 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
enum class ShaderOptimization {
High = 0,
Debug = 1
};
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage,
vk::Device device, ShaderOptimization level);
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,247 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include <optional>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace Vulkan {
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
struct PicaFSConfigState {
Pica::FramebufferRegs::CompareFunc alpha_test_func;
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
bool texture2_use_coord1;
std::array<TevStageConfigRaw, 6> tev_stages;
u8 combiner_buffer_input;
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
bool alphablend_enable;
Pica::FramebufferRegs::LogicOp logic_op;
struct {
struct {
unsigned num;
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
bool geometric_factor_0;
bool geometric_factor_1;
bool shadow_enable;
} light[8];
bool enable;
unsigned src_num;
Pica::LightingRegs::LightingBumpMode bump_mode;
unsigned bump_selector;
bool bump_renorm;
bool clamp_highlights;
Pica::LightingRegs::LightingConfig config;
bool enable_primary_alpha;
bool enable_secondary_alpha;
bool enable_shadow;
bool shadow_primary;
bool shadow_secondary;
bool shadow_invert;
bool shadow_alpha;
unsigned shadow_selector;
struct {
bool enable;
bool abs_input;
Pica::LightingRegs::LightingLutInput type;
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
bool enable;
u32 coord;
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
bool separate_alpha;
bool noise_enable;
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
u32 lut_width;
u32 lut_offset0;
u32 lut_offset1;
u32 lut_offset2;
u32 lut_offset3;
u32 lod_min;
u32 lod_max;
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
bool shadow_rendering;
bool shadow_texture_orthographic;
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*/
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
/// Construct a PicaFSConfig with the given Pica register configuration.
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
};
/**
* This struct contains common information to identify a GL vertex/geometry shader generated from
* PICA vertex/geometry shader.
*/
struct PicaShaderConfigCommon {
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
u64 program_hash;
u64 swizzle_hash;
u32 main_offset;
bool sanitize_mul;
u32 num_outputs;
// output_map[output register index] -> output attribute index
std::array<u32, 16> output_map;
};
/**
* This struct contains information to identify a GL vertex shader generated from PICA vertex
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
state.Init(regs, setup);
}
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
state = conf;
}
};
struct PicaGSConfigCommonRaw {
void Init(const Pica::Regs& regs);
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
* shader pipeline
*/
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
state.Init(regs);
}
};
/**
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
* and directly passes them to the fragment shader.
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateTrivialVertexShader();
/**
* Generates the GLSL vertex shader program source code for the given VS program
* @returns String of the shader source code; boost::none on failure
*/
std::optional<std::string> GenerateVertexShader(
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config);
/**
* Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
* @returns String of the shader source code
*/
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config);
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const PicaFSConfig& config);
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::PicaFSConfig> {
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaVSConfig> {
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaFixedGSConfig> {
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@ -0,0 +1,194 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <algorithm>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
std::pair<vk::AccessFlags, vk::PipelineStageFlags> result{};
switch (usage) {
case vk::BufferUsageFlagBits::eVertexBuffer:
result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead,
vk::PipelineStageFlagBits::eVertexInput);
break;
case vk::BufferUsageFlagBits::eIndexBuffer:
result = std::make_pair(vk::AccessFlagBits::eIndexRead,
vk::PipelineStageFlagBits::eVertexInput);
case vk::BufferUsageFlagBits::eUniformBuffer:
result = std::make_pair(vk::AccessFlagBits::eUniformRead,
vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eGeometryShader |
vk::PipelineStageFlagBits::eFragmentShader);
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
result = std::make_pair(vk::AccessFlagBits::eShaderRead,
vk::PipelineStageFlagBits::eFragmentShader);
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
}
return result;
}
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
: instance{instance} {
const vk::BufferCreateInfo buffer_info = {
.size = size,
.usage = usage
};
const VmaAllocationCreateInfo alloc_create_info = {
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
&unsafe_buffer, &allocation, &alloc_info);
buffer = vk::Buffer{unsafe_buffer};
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
}
StagingBuffer::~StagingBuffer() {
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
}
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats)
: instance{instance}, scheduler{scheduler}, staging{instance, size, vk::BufferUsageFlagBits::eTransferSrc},
usage{usage}, total_size{size} {
const vk::BufferCreateInfo buffer_info = {
.size = total_size,
.usage = usage | vk::BufferUsageFlagBits::eTransferDst
};
const VmaAllocationCreateInfo alloc_create_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
&unsafe_buffer, &allocation, &alloc_info);
buffer = vk::Buffer{unsafe_buffer};
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
vk::Device device = instance.GetDevice();
for (std::size_t i = 0; i < view_formats.size(); i++) {
const vk::BufferViewCreateInfo view_info = {
.buffer = buffer,
.format = view_formats[i],
.offset = 0,
.range = total_size
};
views[i] = device.createBufferView(view_info);
}
view_count = view_formats.size();
bucket_size = size / SCHEDULER_COMMAND_COUNT;
}
StreamBuffer::~StreamBuffer() {
if (buffer) {
vk::Device device = instance.GetDevice();
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
for (std::size_t i = 0; i < view_count; i++) {
device.destroyBufferView(views[i]);
}
}
}
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
ASSERT(size <= total_size && alignment <= total_size);
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
auto& bucket = buckets[current_bucket];
if (bucket.offset + size > bucket_size) {
UNREACHABLE();
}
bool invalidate = false;
if (bucket.invalid) {
invalidate = true;
bucket.invalid = false;
}
const u32 buffer_offset = current_bucket * bucket_size + bucket.offset;
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
return std::make_tuple(mapped, buffer_offset, invalidate);
}
void StreamBuffer::Commit(u32 size) {
buckets[scheduler.GetCurrentSlotIndex()].offset += size;
}
void StreamBuffer::Flush() {
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
const u32 flush_size = buckets[current_bucket].offset;
ASSERT(flush_size <= bucket_size);
if (flush_size > 0) {
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
VmaAllocator allocator = instance.GetAllocator();
const u32 flush_start = current_bucket * bucket_size;
const vk::BufferCopy copy_region = {
.srcOffset = flush_start,
.dstOffset = flush_start,
.size = flush_size
};
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
// Add pipeline barrier for the flushed region
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = access_mask,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = buffer,
.offset = flush_start,
.size = flush_size
};
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
}
// Reset the offset of the next bucket
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
buckets[next_bucket].offset = 0;
buckets[next_bucket].invalid = true;
}
u32 StreamBuffer::GetBufferOffset() const {
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
return current_bucket * bucket_size + buckets[current_bucket].offset;
}
} // namespace Vulkan

View File

@ -0,0 +1,91 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <map>
#include <span>
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class TaskScheduler;
constexpr u32 MAX_BUFFER_VIEWS = 3;
struct LockedRegion {
u32 size = 0;
u64 fence_counter = 0;
};
struct StagingBuffer {
StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
~StagingBuffer();
const Instance& instance;
vk::Buffer buffer{};
VmaAllocation allocation{};
std::span<std::byte> mapped{};
};
class StreamBuffer {
public:
StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
~StreamBuffer();
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
/// Commits size bytes from the currently mapped staging memory
void Commit(u32 size = 0);
/// Flushes staging memory to the GPU buffer
void Flush();
/// Returns the current buffer offset
u32 GetBufferOffset() const;
/// Returns the Vulkan buffer handle
vk::Buffer GetHandle() const {
return buffer;
}
/// Returns an immutable reference to the requested buffer view
const vk::BufferView& GetView(u32 index = 0) const {
ASSERT(index < view_count);
return views[index];
}
private:
/// Invalidates the buffer offsets
void Invalidate();
/// Removes the lock on regions whose fence counter has been reached by the GPU
bool UnlockFreeRegions(u32 target_size);
private:
struct Bucket {
bool invalid;
u32 fence_counter;
u32 offset;
};
const Instance& instance;
TaskScheduler& scheduler;
StagingBuffer staging;
vk::Buffer buffer{};
VmaAllocation allocation{};
vk::BufferUsageFlagBits usage;
u32 total_size = 0;
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
std::size_t view_count = 0;
u32 bucket_size = 0;
std::array<Bucket, SCHEDULER_COMMAND_COUNT> buckets{};
};
} // namespace Vulkan

View File

@ -0,0 +1,232 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include <algorithm>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
namespace Vulkan {
Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache)
: instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} {
// Set the surface format early for RenderpassCache to create the present renderpass
Configure(0, 0);
renderpass_cache.CreatePresentRenderpass(surface_format.format);
}
Swapchain::~Swapchain() {
vk::Device device = instance.GetDevice();
device.destroySwapchainKHR(swapchain);
for (auto& image : swapchain_images) {
device.destroyImageView(image.image_view);
device.destroyFramebuffer(image.framebuffer);
}
}
void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
is_outdated = false;
is_suboptimal = false;
// Fetch information about the provided surface
Configure(width, height);
const std::array queue_family_indices = {
instance.GetGraphicsQueueFamilyIndex(),
instance.GetPresentQueueFamilyIndex(),
};
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
const vk::SharingMode sharing_mode =
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
const vk::SwapchainCreateInfoKHR swapchain_info = {
.surface = surface,
.minImageCount = image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
.imageSharingMode = sharing_mode,
.queueFamilyIndexCount = queue_family_indices_count,
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.presentMode = present_mode,
.clipped = true,
.oldSwapchain = swapchain
};
vk::Device device = instance.GetDevice();
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
// If an old swapchain exists, destroy it and move the new one to its place.
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
device.destroySwapchainKHR(old_swapchain);
}
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
auto images = device.getSwapchainImagesKHR(swapchain);
// Destroy the previous images
for (auto& image : swapchain_images) {
device.destroyImageView(image.image_view);
device.destroyFramebuffer(image.framebuffer);
}
swapchain_images.clear();
swapchain_images.resize(images.size());
std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image {
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = vk::ImageViewType::e2D,
.format = surface_format.format,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
}
};
vk::ImageView image_view = device.createImageView(view_info);
const std::array attachments{image_view};
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = present_renderpass,
.attachmentCount = 1,
.pAttachments = attachments.data(),
.width = extent.width,
.height = extent.height,
.layers = 1
};
vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info);
return Image{
.image = image,
.image_view = image_view,
.framebuffer = framebuffer
};
});
}
// Wait for maximum of 1 second
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
vk::Device device = instance.GetDevice();
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
VK_NULL_HANDLE, &current_image);
switch (result) {
case vk::Result::eSuccess:
break;
case vk::Result::eSuboptimalKHR:
is_suboptimal = true;
break;
case vk::Result::eErrorOutOfDateKHR:
is_outdated = true;
break;
default:
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
break;
}
}
void Swapchain::Present(vk::Semaphore wait_for_present) {
const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1,
.pWaitSemaphores = &wait_for_present,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &current_image
};
vk::Queue present_queue = instance.GetPresentQueue();
vk::Result result = present_queue.presentKHR(present_info);
switch (result) {
case vk::Result::eSuccess:
break;
case vk::Result::eSuboptimalKHR:
LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
break;
case vk::Result::eErrorOutOfDateKHR:
is_outdated = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
break;
}
current_frame = (current_frame + 1) % swapchain_images.size();
}
void Swapchain::Configure(u32 width, u32 height) {
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
// Choose surface format
auto formats = physical.getSurfaceFormatsKHR(surface);
surface_format = formats[0];
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
surface_format.format = vk::Format::eB8G8R8A8Unorm;
} else {
auto it = std::ranges::find_if(formats, [](vk::SurfaceFormatKHR format) -> bool {
return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
format.format == vk::Format::eB8G8R8A8Unorm;
});
if (it == formats.end()) {
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
} else {
surface_format = *it;
}
}
// Checks if a particular mode is supported, if it is, returns that mode.
auto modes = physical.getSurfacePresentModesKHR(surface);
// FIFO is guaranteed by the Vulkan standard to be available
present_mode = vk::PresentModeKHR::eFifo;
auto iter = std::ranges::find_if(modes, [](vk::PresentModeKHR mode) {
return vk::PresentModeKHR::eMailbox == mode;
});
// Prefer Mailbox if present for lowest latency
if (iter != modes.end()) {
present_mode = vk::PresentModeKHR::eMailbox;
}
// Query surface extent
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
extent.width = std::clamp(width, capabilities.minImageExtent.width,
capabilities.maxImageExtent.width);
extent.height = std::clamp(height, capabilities.minImageExtent.height,
capabilities.maxImageExtent.height);
}
// Select number of images in swap chain, we prefer one buffer in the background to work on
image_count = capabilities.minImageCount + 1;
if (capabilities.maxImageCount > 0) {
image_count = std::min(image_count, capabilities.maxImageCount);
}
// Prefer identity transform if possible
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & transform)) {
transform = capabilities.currentTransform;
}
}
} // namespace Vulkan

View File

@ -0,0 +1,91 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class RenderpassCache;
class Swapchain {
public:
Swapchain(const Instance& instance, RenderpassCache& renderpass_cache);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, bool vsync_enabled);
/// Acquires the next image in the swapchain.
void AcquireNextImage(vk::Semaphore signal_acquired);
/// Presents the current image and move to the next one
void Present(vk::Semaphore wait_for_present);
/// Returns current swapchain state
vk::Extent2D GetExtent() const {
return extent;
}
/// Returns the swapchain surface
vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Returns the current framebuffe
vk::Framebuffer GetFramebuffer() const {
return swapchain_images[current_image].framebuffer;
}
/// Returns the swapchain format
vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
}
/// Returns the Vulkan swapchain handle
vk::SwapchainKHR GetHandle() const {
return swapchain;
}
/// Returns true when the swapchain should be recreated
bool NeedsRecreation() const {
return is_suboptimal || is_outdated;
}
private:
void Configure(u32 width, u32 height);
private:
const Instance& instance;
RenderpassCache& renderpass_cache;
vk::SwapchainKHR swapchain{};
vk::SurfaceKHR surface{};
// Swapchain properties
vk::SurfaceFormatKHR surface_format;
vk::PresentModeKHR present_mode;
vk::Extent2D extent;
vk::SurfaceTransformFlagBitsKHR transform;
u32 image_count;
struct Image {
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
};
// Swapchain state
std::vector<Image> swapchain_images;
u32 current_image = 0;
u32 current_frame = 0;
bool vsync_enabled = false;
bool is_outdated = true;
bool is_suboptimal = true;
};
} // namespace Vulkan

View File

@ -0,0 +1,252 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo command_pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
};
command_pool = device.createCommandPool(command_pool_info);
// If supported, prefer timeline semaphores over binary ones
if (instance.IsTimelineSemaphoreSupported()) {
const vk::StructureChain timeline_info = {
vk::SemaphoreCreateInfo{},
vk::SemaphoreTypeCreateInfo{
.semaphoreType = vk::SemaphoreType::eTimeline,
.initialValue = 0
}
};
timeline = device.createSemaphore(timeline_info.get());
}
constexpr std::array pool_sizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024},
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, 512},
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024}
};
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
.maxSets = 2048,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data()
};
const vk::CommandBufferAllocateInfo buffer_info = {
.commandPool = command_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
};
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
for (std::size_t i = 0; i < commands.size(); i++) {
commands[i] = ExecutionSlot{
.image_acquired = device.createSemaphore({}),
.present_ready = device.createSemaphore({}),
.fence = device.createFence({}),
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
.render_command_buffer = command_buffers[2 * i],
.upload_command_buffer = command_buffers[2 * i + 1],
};
}
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
// Begin first command
auto& command = commands[current_command];
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
}
TaskScheduler::~TaskScheduler() {
vk::Device device = instance.GetDevice();
device.waitIdle();
if (timeline) {
device.destroySemaphore(timeline);
}
for (const auto& command : commands) {
device.destroyFence(command.fence);
device.destroySemaphore(command.image_acquired);
device.destroySemaphore(command.present_ready);
device.destroyDescriptorPool(command.descriptor_pool);
}
device.destroyCommandPool(command_pool);
}
void TaskScheduler::Synchronize(u32 slot) {
const auto& command = commands[slot];
vk::Device device = instance.GetDevice();
u32 completed_counter = completed_fence_counter;
if (instance.IsTimelineSemaphoreSupported()) {
completed_counter = device.getSemaphoreCounterValue(timeline);
}
if (command.fence_counter > completed_counter) {
if (instance.IsTimelineSemaphoreSupported()) {
const vk::SemaphoreWaitInfo wait_info = {
.semaphoreCount = 1,
.pSemaphores = &timeline,
.pValues = &command.fence_counter
};
if (device.waitSemaphores(wait_info, UINT64_MAX) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter);
UNREACHABLE();
}
} else if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter);
UNREACHABLE();
}
}
completed_fence_counter = command.fence_counter;
device.resetFences(command.fence);
device.resetDescriptorPool(command.descriptor_pool);
}
void TaskScheduler::Submit(SubmitMode mode) {
const auto& command = commands[current_command];
command.render_command_buffer.end();
if (command.use_upload_buffer) {
command.upload_command_buffer.end();
}
u32 command_buffer_count = 0;
std::array<vk::CommandBuffer, 2> command_buffers;
if (command.use_upload_buffer) {
command_buffers[command_buffer_count++] = command.upload_command_buffer;
}
command_buffers[command_buffer_count++] = command.render_command_buffer;
const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
if (instance.IsTimelineSemaphoreSupported()) {
const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array wait_values{command.fence_counter - 1, 1ul};
const std::array wait_semaphores{timeline, command.image_acquired};
const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array signal_values{command.fence_counter, 0ul};
const std::array signal_semaphores{timeline, command.present_ready};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = wait_semaphore_count,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = signal_semaphore_count,
.pSignalSemaphoreValues = signal_values.data()
};
const std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = signal_semaphores.data(),
};
vk::Queue queue = instance.GetGraphicsQueue();
queue.submit(submit_info);
} else {
const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
const vk::PipelineStageFlags wait_stage_masks =
vk::PipelineStageFlagBits::eColorAttachmentOutput;
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = &command.image_acquired,
.pWaitDstStageMask = &wait_stage_masks,
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = &command.present_ready,
};
vk::Queue queue = instance.GetGraphicsQueue();
queue.submit(submit_info, command.fence);
}
// Block host until the GPU catches up
if (True(mode & SubmitMode::Flush)) {
Synchronize(current_command);
}
// Switch to next cmdbuffer.
if (False(mode & SubmitMode::Shutdown)) {
SwitchSlot();
}
}
u64 TaskScheduler::GetFenceCounter() const {
vk::Device device = instance.GetDevice();
if (instance.IsTimelineSemaphoreSupported()) {
return device.getSemaphoreCounterValue(timeline);
}
return completed_fence_counter;
}
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
auto& command = commands[current_command];
if (!command.use_upload_buffer) {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
command.upload_command_buffer.begin(begin_info);
command.use_upload_buffer = true;
}
return command.upload_command_buffer;
}
void TaskScheduler::SwitchSlot() {
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
auto& command = commands[current_command];
// Wait for the GPU to finish with all resources for this command.
Synchronize(current_command);
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
// Begin the next command buffer.
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
command.use_upload_buffer = false;
}
} // namespace Vulkan

View File

@ -0,0 +1,97 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <array>
#include <functional>
#include "common/common_types.h"
#include "common/common_funcs.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Buffer;
class Instance;
enum class SubmitMode : u8 {
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
};
DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
class TaskScheduler {
public:
TaskScheduler(const Instance& instance);
~TaskScheduler();
/// Blocks the host until the current command completes execution
void Synchronize(u32 slot);
/// Submits the current command to the graphics queue
void Submit(SubmitMode mode);
/// Returns the last completed fence counter
u64 GetFenceCounter() const;
/// Returns the command buffer used for early upload operations.
vk::CommandBuffer GetUploadCommandBuffer();
/// Returns the command buffer used for rendering
vk::CommandBuffer GetRenderCommandBuffer() const {
return commands[current_command].render_command_buffer;
}
/// Returns the current descriptor pool
vk::DescriptorPool GetDescriptorPool() const {
return commands[current_command].descriptor_pool;
}
/// Returns the index of the current command slot
u32 GetCurrentSlotIndex() const {
return current_command;
}
u64 GetHostFenceCounter() const {
return next_fence_counter - 1;
}
vk::Semaphore GetImageAcquiredSemaphore() const {
return commands[current_command].image_acquired;
}
vk::Semaphore GetPresentReadySemaphore() const {
return commands[current_command].present_ready;
}
private:
/// Activates the next command slot and optionally waits for its completion
void SwitchSlot();
private:
const Instance& instance;
u64 next_fence_counter = 1;
u64 completed_fence_counter = 0;
struct ExecutionSlot {
bool use_upload_buffer = false;
u64 fence_counter = 0;
vk::Semaphore image_acquired;
vk::Semaphore present_ready;
vk::Fence fence;
vk::DescriptorPool descriptor_pool;
vk::CommandBuffer render_command_buffer;
vk::CommandBuffer upload_command_buffer;
};
vk::CommandPool command_pool{};
vk::Semaphore timeline{};
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
u32 current_command = 0;
};
} // namespace Vulkan

View File

@ -0,0 +1,705 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
vk::Format ToVkFormat(VideoCore::PixelFormat format) {
switch (format) {
case VideoCore::PixelFormat::RGBA8:
return vk::Format::eR8G8B8A8Unorm;
case VideoCore::PixelFormat::RGB8:
return vk::Format::eR8G8B8Unorm;
case VideoCore::PixelFormat::RGB5A1:
return vk::Format::eR5G5B5A1UnormPack16;
case VideoCore::PixelFormat::RGB565:
return vk::Format::eR5G6B5UnormPack16;
case VideoCore::PixelFormat::RGBA4:
return vk::Format::eR4G4B4A4UnormPack16;
case VideoCore::PixelFormat::D16:
return vk::Format::eD16Unorm;
case VideoCore::PixelFormat::D24:
return vk::Format::eX8D24UnormPack32;
case VideoCore::PixelFormat::D24S8:
return vk::Format::eD24UnormS8Uint;
case VideoCore::PixelFormat::Invalid:
LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
return vk::Format::eUndefined;
default:
// Use default case for the texture formats
return vk::Format::eR8G8B8A8Unorm;
}
}
vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
switch (type) {
case VideoCore::SurfaceType::Color:
case VideoCore::SurfaceType::Texture:
case VideoCore::SurfaceType::Fill:
return vk::ImageAspectFlagBits::eColor;
case VideoCore::SurfaceType::Depth:
return vk::ImageAspectFlagBits::eDepth;
case VideoCore::SurfaceType::DepthStencil:
return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
default:
UNREACHABLE_MSG("Invalid surface type!");
}
return vk::ImageAspectFlagBits::eColor;
}
vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) {
switch (type) {
case VideoCore::SurfaceType::Color:
case VideoCore::SurfaceType::Texture:
case VideoCore::SurfaceType::Fill:
return vk::FormatFeatureFlagBits::eColorAttachment;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return vk::FormatFeatureFlagBits::eDepthStencilAttachment;
default:
UNREACHABLE_MSG("Invalid surface type!");
}
return vk::FormatFeatureFlagBits::eColorAttachment;
}
constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} {
for (auto& buffer : staging_buffers) {
buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst);
}
}
TextureRuntime::~TextureRuntime() {
VmaAllocator allocator = instance.GetAllocator();
vk::Device device = instance.GetDevice();
device.waitIdle();
for (const auto& [key, alloc] : texture_recycler) {
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
device.destroyImageView(alloc.image_view);
}
for (const auto& [key, framebuffer] : clear_framebuffers) {
device.destroyFramebuffer(framebuffer);
}
texture_recycler.clear();
}
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const u32 current_slot = scheduler.GetCurrentSlotIndex();
const u32 offset = staging_offsets[current_slot];
if (offset + size > STAGING_BUFFER_SIZE) {
LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
UNREACHABLE();
}
const auto& buffer = staging_buffers[current_slot];
return StagingData{
.buffer = buffer->buffer,
.size = size,
.mapped = buffer->mapped.subspan(offset, size),
.buffer_offset = offset
};
}
void TextureRuntime::OnSlotSwitch(u32 new_slot) {
staging_offsets[new_slot] = 0;
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) {
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
const VideoCore::HostTextureTag key = {
.format = format,
.width = width,
.height = height,
.layers = layers
};
// Attempt to recycle an unused allocation
if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
ImageAlloc alloc = std::move(it->second);
texture_recycler.erase(it);
return alloc;
}
// Create a new allocation
vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format));
vk::ImageAspectFlags aspect = GetImageAspect(vk_format);
const u32 levels = std::bit_width(std::max(width, height));
const vk::ImageCreateInfo image_info = {
.flags = type == VideoCore::TextureType::CubeMap ?
vk::ImageCreateFlagBits::eCubeCompatible :
vk::ImageCreateFlags{},
.imageType = vk::ImageType::e2D,
.format = vk_format,
.extent = {width, height, 1},
.mipLevels = levels,
.arrayLayers = layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = GetImageUsage(aspect),
};
const VmaAllocationCreateInfo alloc_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
};
VkImage unsafe_image{};
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VmaAllocation allocation;
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
&unsafe_image, &allocation, nullptr);
if (result != VK_SUCCESS) {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
UNREACHABLE();
}
vk::Image image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = type == VideoCore::TextureType::CubeMap ?
vk::ImageViewType::eCube :
vk::ImageViewType::e2D,
.format = vk_format,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = layers
}
};
vk::Device device = instance.GetDevice();
vk::ImageView image_view = device.createImageView(view_info);
return ImageAlloc{
.image = image,
.image_view = image_view,
.allocation = allocation,
.aspect = aspect,
.levels = levels,
};
}
void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc) {
texture_recycler.emplace(tag, std::move(alloc));
}
void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest) {
const VideoCore::SurfaceType type = VideoCore::GetFormatType(format);
const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type);
if (format == VideoCore::PixelFormat::RGBA8) {
return Pica::Texture::ConvertABGRToRGBA(source, dest);
} else if (format == VideoCore::PixelFormat::RGB8 && upload) {
return Pica::Texture::ConvertBGRToRGBA(source, dest);
} else if (instance.IsFormatSupported(ToVkFormat(format), feature)) {
std::memcpy(dest.data(), source.data(), source.size());
} else {
LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format);
std::memcpy(dest.data(), source.data(), source.size());
}
}
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value) {
const vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal,
0, surface.alloc.levels, 0, surface.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
vk::ClearValue clear_value{};
if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_value.color = vk::ClearColorValue{
.float32 = std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]})
};
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
clear_value.depthStencil = vk::ClearDepthStencilValue{
.depth = value.depth,
.stencil = value.stencil
};
}
// For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage
if (clear.texture_rect == surface.GetScaledRect()) {
const vk::ImageSubresourceRange range = {
.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
};
if (aspect & vk::ImageAspectFlagBits::eColor) {
command_buffer.clearColorImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
clear_value.color, range);
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
command_buffer.clearDepthStencilImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
clear_value.depthStencil, range);
}
} else {
// For partial clears we begin a clear renderpass with the appropriate render area
vk::RenderPass clear_renderpass{};
ImageAlloc& alloc = surface.alloc;
if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_renderpass = renderpass_cache.GetRenderpass(surface.pixel_format,
VideoCore::PixelFormat::Invalid, true);
Transition(command_buffer, alloc, vk::ImageLayout::eColorAttachmentOptimal, 0, alloc.levels);
} else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) {
clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
surface.pixel_format, true);
Transition(command_buffer, alloc, vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, alloc.levels);
}
auto [it, new_framebuffer] = clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{});
if (new_framebuffer) {
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = clear_renderpass,
.attachmentCount = 1,
.pAttachments = &alloc.image_view,
.width = surface.GetScaledWidth(),
.height = surface.GetScaledHeight(),
.layers = 1
};
vk::Device device = instance.GetDevice();
it->second = device.createFramebuffer(framebuffer_info);
}
const vk::RenderPassBeginInfo clear_begin_info = {
.renderPass = clear_renderpass,
.framebuffer = it->second,
.renderArea = vk::Rect2D{
.offset = {static_cast<s32>(clear.texture_rect.left), static_cast<s32>(clear.texture_rect.bottom)},
.extent = {clear.texture_rect.GetWidth(), clear.texture_rect.GetHeight()}
},
.clearValueCount = 1,
.pClearValues = &clear_value
};
renderpass_cache.EnterRenderpass(clear_begin_info);
renderpass_cache.ExitRenderpass();
}
return true;
}
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
renderpass_cache.ExitRenderpass();
const vk::ImageCopy image_copy = {
.srcSubresource = {
.aspectMask = ToVkAspect(source.type),
.mipLevel = copy.src_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
.dstSubresource = {
.aspectMask = ToVkAspect(dest.type),
.mipLevel = copy.dst_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
.extent = {copy.extent.width, copy.extent.height, 1}
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0, source.alloc.levels);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0, dest.alloc.levels);
command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
return true;
}
bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) {
renderpass_cache.ExitRenderpass();
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal,
0, source.alloc.levels, 0, source.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal,
0, dest.alloc.levels, 0, dest.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
const std::array source_offsets = {
vk::Offset3D{static_cast<s32>(blit.src_rect.left), static_cast<s32>(blit.src_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.src_rect.right), static_cast<s32>(blit.src_rect.top), 1}
};
const std::array dest_offsets = {
vk::Offset3D{static_cast<s32>(blit.dst_rect.left), static_cast<s32>(blit.dst_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.dst_rect.right), static_cast<s32>(blit.dst_rect.top), 1}
};
const vk::ImageBlit blit_area = {
.srcSubresource = {
.aspectMask = ToVkAspect(source.type),
.mipLevel = blit.src_level,
.baseArrayLayer = blit.src_layer,
.layerCount = 1
},
.srcOffsets = source_offsets,
.dstSubresource = {
.aspectMask = ToVkAspect(dest.type),
.mipLevel = blit.dst_level,
.baseArrayLayer = blit.dst_layer,
.layerCount = 1
},
.dstOffsets = dest_offsets
};
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
return true;
}
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
renderpass_cache.ExitRenderpass();
// TODO: Investigate AMD single pass downsampler
s32 current_width = surface.GetScaledWidth();
s32 current_height = surface.GetScaledHeight();
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) {
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {
vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width, current_height, 1}
};
const std::array dest_offsets = {
vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
current_height > 1 ? current_height / 2 : 1, 1}
};
const vk::ImageBlit blit_area = {
.srcSubresource = {
.aspectMask = aspect,
.mipLevel = i - 1,
.baseArrayLayer = 0,
.layerCount = 1
},
.srcOffsets = source_offsets,
.dstSubresource = {
.aspectMask = aspect,
.mipLevel = i,
.baseArrayLayer = 0,
.layerCount = 1
},
.dstOffsets = dest_offsets
};
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
}
}
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count,
u32 layer, u32 layer_count) {
if (new_layout == alloc.layout || !alloc.image) {
return;
}
struct LayoutInfo {
vk::AccessFlags access;
vk::PipelineStageFlags stage;
};
// Get optimal transition settings for every image layout. Settings taken from Dolphin
auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
LayoutInfo info;
switch (layout) {
case vk::ImageLayout::eUndefined:
// Layout undefined therefore contents undefined, and we don't care what happens to it.
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
break;
case vk::ImageLayout::ePreinitialized:
// Image has been pre-initialized by the host, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eHostWrite;
info.stage = vk::PipelineStageFlagBits::eHost;
break;
case vk::ImageLayout::eColorAttachmentOptimal:
// Image was being used as a color attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eColorAttachmentRead |
vk::AccessFlagBits::eColorAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
break;
case vk::ImageLayout::eDepthStencilAttachmentOptimal:
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests;
break;
case vk::ImageLayout::ePresentSrcKHR:
info.access = vk::AccessFlagBits::eNone;
info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
break;
case vk::ImageLayout::eShaderReadOnlyOptimal:
// Image was being used as a shader resource, make sure all reads have finished.
info.access = vk::AccessFlagBits::eShaderRead;
info.stage = vk::PipelineStageFlagBits::eFragmentShader;
break;
case vk::ImageLayout::eTransferSrcOptimal:
// Image was being used as a copy source, ensure all reads have finished.
info.access = vk::AccessFlagBits::eTransferRead;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
case vk::ImageLayout::eTransferDstOptimal:
// Image was being used as a copy destination, ensure all writes have finished.
info.access = vk::AccessFlagBits::eTransferWrite;
info.stage = vk::PipelineStageFlagBits::eTransfer;
break;
case vk::ImageLayout::eGeneral:
info.access = vk::AccessFlagBits::eInputAttachmentRead;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eFragmentShader;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
UNREACHABLE();
}
return info;
};
LayoutInfo source = GetLayoutInfo(alloc.layout);
LayoutInfo dest = GetLayoutInfo(new_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = alloc.layout,
.newLayout = new_layout,
.image = alloc.image,
.subresourceRange = {
.aspectMask = alloc.aspect,
.baseMipLevel = /*level*/0,
.levelCount = /*level_count*/alloc.levels,
.baseArrayLayer = layer,
.layerCount = layer_count
}
};
command_buffer.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion,
{}, {}, barrier);
alloc.layout = new_layout;
}
Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime)
: VideoCore::SurfaceBase<Surface>{params}, runtime{runtime}, instance{runtime.GetInstance()},
scheduler{runtime.GetScheduler()} {
if (pixel_format != VideoCore::PixelFormat::Invalid) {
alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type);
}
}
Surface::~Surface() {
if (pixel_format != VideoCore::PixelFormat::Invalid) {
const VideoCore::HostTextureTag tag = {
.format = pixel_format,
.width = GetScaledWidth(),
.height = GetScaledHeight(),
.layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u
};
runtime.Recycle(tag, std::move(alloc));
}
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload);
runtime.renderpass_cache.ExitRenderpass();
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledUpload(upload);
} else {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = upload.texture_rect;
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = alloc.aspect,
.mipLevel = upload.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0, alloc.levels,
0, texture_type == VideoCore::TextureType::CubeMap ? 6 : 1);
command_buffer.copyBufferToImage(staging.buffer, alloc.image,
vk::ImageLayout::eTransferDstOptimal,
copy_region);
}
InvalidateAllWatcher();
// Lock this data until the next scheduler switch
const u32 current_slot = scheduler.GetCurrentSlotIndex();
runtime.staging_offsets[current_slot] += staging.size;
}
MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Download);
runtime.renderpass_cache.ExitRenderpass();
const bool is_scaled = res_scale != 1;
if (is_scaled) {
ScaledDownload(download);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const VideoCore::Rect2D rect = download.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {
.aspectMask = alloc.aspect,
.mipLevel = download.texture_level,
.baseArrayLayer = 0,
.layerCount = 1
},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}
};
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
return; // HACK: Skip depth + stencil downloads for now
copy_region.bufferOffset += staging.mapped.size();
copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
// Copy pixel data to the staging buffer
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
scheduler.Submit(SubmitMode::Flush);
}
// Lock this data until the next scheduler switch
const u32 current_slot = scheduler.GetCurrentSlotIndex();
runtime.staging_offsets[current_slot] += staging.size;
}
void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
/*const u32 rect_width = download.texture_rect.GetWidth();
const u32 rect_height = download.texture_rect.GetHeight();
// Allocate an unscaled texture that fits the download rectangle to use as a blit destination
const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture);
// Blit the scaled rectangle to the unscaled texture
const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top,
0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
const auto& tuple = runtime.GetFormatTuple(pixel_format);
if (driver.IsOpenGLES()) {
const auto& downloader_es = runtime.GetDownloaderES();
downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
rect_height, rect_width,
reinterpret_cast<void*>(download.buffer_offset));
} else {
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
reinterpret_cast<void*>(download.buffer_offset));
}*/
}
void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) {
/*const u32 rect_width = upload.texture_rect.GetWidth();
const u32 rect_height = upload.texture_rect.GetHeight();
OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format,
VideoCore::TextureType::Texture2D);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle);
glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height,
tuple.format, tuple.type, reinterpret_cast<void*>(upload.buffer_offset));
const auto scaled_rect = upload.texture_rect * res_scale;
const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0};
const auto& filterer = runtime.GetFilterer();
if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) {
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex);
runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture);
// If filtering fails, resort to normal blitting
glBlitFramebuffer(0, 0, rect_width, rect_height,
upload.texture_rect.left, upload.texture_rect.bottom,
upload.texture_rect.right, upload.texture_rect.top,
MakeBufferMask(type), GL_LINEAR);
}*/
}
} // namespace Vulkan

View File

@ -0,0 +1,143 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <set>
#include <vulkan/vulkan_hash.hpp>
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/rasterizer_cache/types.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
struct StagingData {
vk::Buffer buffer;
u32 size = 0;
std::span<std::byte> mapped{};
u32 buffer_offset = 0;
};
struct ImageAlloc {
vk::Image image;
vk::ImageView image_view;
VmaAllocation allocation;
vk::ImageLayout layout = vk::ImageLayout::eUndefined;
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
u32 levels = 1;
};
class Instance;
class RenderpassCache;
class Surface;
/**
* Provides texture manipulation functions to the rasterizer cache
* Separating this into a class makes it easier to abstract graphics API code
*/
class TextureRuntime {
friend class Surface;
public:
TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache);
~TextureRuntime();
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
/// Allocates a vulkan image possibly resusing an existing one
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
/// Takes back ownership of the allocation for recycling
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
/// Performs required format convertions on the staging data
void FormatConvert(VideoCore::PixelFormat format, bool upload,
std::span<std::byte> source, std::span<std::byte> dest);
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count,
u32 layer = 0, u32 layer_count = 1);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
VideoCore::ClearValue value);
/// Copies a rectangle of src_tex to another rectange of dst_rect
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
/// Blits a rectangle of src_tex to another rectange of dst_rect
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface, u32 max_level);
/// Performs operations that need to be done on every scheduler slot switch
void OnSlotSwitch(u32 new_slot);
private:
/// Returns the current Vulkan instance
const Instance& GetInstance() const {
return instance;
}
/// Returns the current Vulkan scheduler
TaskScheduler& GetScheduler() const {
return scheduler;
}
private:
const Instance& instance;
TaskScheduler& scheduler;
RenderpassCache& renderpass_cache;
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
std::unordered_multimap<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
};
class Surface : public VideoCore::SurfaceBase<Surface> {
friend class TextureRuntime;
friend class RasterizerVulkan;
public:
Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime);
~Surface() override;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
/// Downloads pixel data to staging from a rectangle region of the surface texture
void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
private:
/// Downloads scaled image by downscaling the requested rectangle
void ScaledDownload(const VideoCore::BufferTextureCopy& download);
/// Uploads pixel data to scaled texture
void ScaledUpload(const VideoCore::BufferTextureCopy& upload);
/// Overrides the image layout of the mip level range
void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);
private:
TextureRuntime& runtime;
const Instance& instance;
TaskScheduler& scheduler;
ImageAlloc alloc{};
vk::Format internal_format = vk::Format::eUndefined;
};
struct Traits {
using RuntimeType = TextureRuntime;
using SurfaceType = Surface;
};
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
} // namespace Vulkan

View File

@ -0,0 +1,97 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <unordered_map>
#include <tuple>
#include "video_core/shader/shader.h"
namespace Pica::Shader {
template <typename ShaderType>
using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
std::string(*CodeGenerator)(const KeyType&)>
class ShaderCache {
public:
ShaderCache() {}
~ShaderCache() = default;
/// Returns a shader handle generated from the provided config
template <typename... Args>
auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult<ShaderType> {
auto [iter, new_shader] = shaders.emplace(config, ShaderType{});
auto& shader = iter->second;
if (new_shader) {
std::string code = CodeGenerator(config);
shader = ModuleCompiler(code, args...);
return std::make_pair(shader, code);
}
return std::make_pair(shader, std::nullopt);
}
void Inject(const KeyType& key, ShaderType&& shader) {
shaders.emplace(key, std::move(shader));
}
public:
std::unordered_map<KeyType, ShaderType> shaders;
};
/**
* This is a cache designed for shaders translated from PICA shaders. The first cache matches the
* config structure like a normal cache does. On cache miss, the second cache matches the generated
* GLSL code. The configuration is like this because there might be leftover code in the PICA shader
* program buffer from the previous shader, which is hashed into the config, resulting several
* different config values from the same shader program.
*/
template <typename KeyType, typename ShaderType, auto ModuleCompiler,
std::optional<std::string>(*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyType&)>
class ShaderDoubleCache {
public:
ShaderDoubleCache() = default;
~ShaderDoubleCache() = default;
template <typename... Args>
auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) -> ShaderCacheResult<ShaderType> {
if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
auto code = CodeGenerator(setup, key);
if (!code) {
shader_map[key] = nullptr;
return std::make_pair(ShaderType{}, std::nullopt);
}
std::string& program = code.value();
auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{});
auto& shader = iter->second;
if (new_shader) {
shader = ModuleCompiler(program, args...);
}
shader_map[key] = &shader;
return std::make_pair(shader, std::move(program));
} else {
return std::make_pair(*map_iter->second, std::nullopt);
}
}
void Inject(const KeyType& key, std::string decomp, ShaderType&& program) {
const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first;
auto& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
public:
std::unordered_map<KeyType, ShaderType*> shader_map;
std::unordered_map<std::string, ShaderType> shader_cache;
};
} // namespace Pica::Shader

View File

@ -0,0 +1,25 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_uniforms.h"
namespace Pica::Shader {
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup) {
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
[](bool value) -> BoolAligned { return {value ? 1 : 0}; });
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),
[](const auto& value) -> Common::Vec4u {
return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
});
std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f),
[](const auto& value) -> Common::Vec4f {
return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(),
value.w.ToFloat32()};
});
}
} // namespace Pica::Shader

View File

@ -0,0 +1,99 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "video_core/regs_lighting.h"
namespace Pica {
struct ShaderRegs;
}
namespace Pica::Shader {
class ShaderSetup;
enum class UniformBindings : u32 { Common, VS, GS };
struct LightSrc {
alignas(16) Common::Vec3f specular_0;
alignas(16) Common::Vec3f specular_1;
alignas(16) Common::Vec3f diffuse;
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/**
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
* Not following that rule will cause problems on some AMD drivers.
*/
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
bool enable_clip1;
alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4];
alignas(16) Common::Vec3f fog_color;
alignas(8) Common::Vec2f proctex_noise_f;
alignas(8) Common::Vec2f proctex_noise_a;
alignas(8) Common::Vec2f proctex_noise_p;
alignas(16) Common::Vec3f lighting_global_ambient;
LightSrc light_src[8];
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec4f clip_coef;
};
static_assert(sizeof(UniformData) == 0x4F0,
"The size of the UniformData does not match the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/**
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
* NOTE: the same rule from UniformData also applies here.
*/
struct PicaUniformsData {
void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup);
struct BoolAligned {
alignas(16) int b;
};
std::array<BoolAligned, 16> bools;
alignas(16) std::array<Common::Vec4u, 4> i;
alignas(16) std::array<Common::Vec4f, 96> f;
};
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(sizeof(VSUniformData) == 1856,
"The size of the VSUniformData does not match the structure in the shader");
static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
} // namespace Pica::Shader

View File

@ -227,14 +227,14 @@ void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> des
for (std::size_t i = 0; i < source.size(); i += 3) {
u32 bgr{};
std::memcpy(&bgr, source.data() + i, 3);
const u32 rgb = std::byteswap(bgr << 8);
const u32 rgb = Common::swap32(bgr << 8);
std::memcpy(dest.data(), &rgb, 3);
}
}
void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
u32 j = 0;
for (u32 i = 0; i < source.size(); i += 3) {
for (std::size_t i = 0; i < source.size(); i += 3) {
dest[j] = source[i + 2];
dest[j + 1] = source[i + 1];
dest[j + 2] = source[i];
@ -246,7 +246,7 @@ void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> de
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
for (u32 i = 0; i < source.size(); i += 4) {
const u32 abgr = *reinterpret_cast<const u32*>(source.data() + i);
const u32 rgba = std::byteswap(abgr);
const u32 rgba = Common::swap32(abgr);
std::memcpy(dest.data() + i, &rgba, 4);
}
}

View File

@ -11,6 +11,7 @@
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/video_core.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -44,15 +45,26 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondar
g_memory = &memory;
Pica::Init();
OpenGL::GLES = Settings::values.graphics_api.GetValue() == Settings::GraphicsAPI::OpenGLES;
const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api.GetValue();
switch (graphics_api) {
case Settings::GraphicsAPI::OpenGL:
case Settings::GraphicsAPI::OpenGLES:
OpenGL::GLES = graphics_api == Settings::GraphicsAPI::OpenGLES;
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window, secondary_window);
ResultStatus result = g_renderer->Init();
break;
case Settings::GraphicsAPI::Vulkan:
g_renderer = std::make_unique<Vulkan::RendererVulkan>(emu_window);
break;
default:
LOG_CRITICAL(Render, "Invalid graphics API enum value {}", graphics_api);
UNREACHABLE();
}
ResultStatus result = g_renderer->Init();
if (result != ResultStatus::Success) {
LOG_ERROR(Render, "initialization failed !");
LOG_ERROR(Render, "Video core initialization failed");
} else {
LOG_DEBUG(Render, "initialized OK");
LOG_INFO(Render, "Video core initialization OK");
}
return result;

View File

@ -3,8 +3,8 @@
// Refer to the license.txt file included.
#include <system_error>
#include <jwt/jwt.hpp>
#include "common/logging/log.h"
#include <jwt/jwt.hpp>
#include "common/web_result.h"
#include "web_service/verify_user_jwt.h"
#include "web_service/web_backend.h"