diff --git a/.gitmodules b/.gitmodules index b45d17bdb..17b412201 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,9 @@ [submodule "vulkan-headers"] path = externals/vulkan-headers url = https://github.com/KhronosGroup/Vulkan-Headers +[submodule "glslang"] + path = externals/glslang + url = https://github.com/KhronosGroup/glslang +[submodule "glm"] + path = externals/glm + url = https://github.com/g-truc/glm diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d98b74fa..1bb572d7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ cmake_policy(SET CMP0069 NEW) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") include(DownloadExternals) +include(GNUInstallDirs) include(CMakeDependentOption) project(citra LANGUAGES C CXX ASM) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index c2385c48e..2efd618f9 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -60,6 +60,16 @@ endif() # Glad add_subdirectory(glad) +# glslang +set(SKIP_GLSLANG_INSTALL ON) +set(ENABLE_GLSLANG_BINARIES OFF) +set(ENABLE_SPVREMAPPER OFF) +set(ENABLE_CTEST OFF) +add_subdirectory(glslang) + +# glm +add_subdirectory(glm) + # inih add_subdirectory(inih) diff --git a/externals/glm b/externals/glm new file mode 160000 index 000000000..cc98465e3 --- /dev/null +++ b/externals/glm @@ -0,0 +1 @@ +Subproject commit cc98465e3508535ba8c7f6208df934c156a018dc diff --git a/externals/glslang b/externals/glslang new file mode 160000 index 000000000..c0cf8ad87 --- /dev/null +++ b/externals/glslang @@ -0,0 +1 @@ +Subproject commit c0cf8ad87627227c7f1d3be6177bbf2832d3fc1b diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f59607d66..8431237a0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -122,6 +122,7 @@ else() if (MINGW) add_definitions(-DMINGW_HAS_SECURE_API) + add_compile_options("-Wa,-mbig-obj") if (COMPILE_WITH_DWARF) add_compile_options("-gdwarf") endif() diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index 120a320ec..01496953e 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -269,6 +269,10 @@ target_link_libraries(citra-qt PRIVATE audio_core common core input_common netwo target_link_libraries(citra-qt PRIVATE Boost::boost glad nihstro-headers Qt5::Widgets Qt5::Multimedia) target_link_libraries(citra-qt PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +if (NOT WIN32) + target_include_directories(citra-qt PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) +endif() + target_compile_definitions(citra-qt PRIVATE # Use QStringBuilder for string concatenation to reduce # the overall number of temporary strings created. diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 3f0cba65f..8401d40b8 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -25,6 +25,10 @@ #include "video_core/renderer_base.h" #include "video_core/video_core.h" +#if !defined(WIN32) +#include +#endif + EmuThread::EmuThread(Frontend::GraphicsContext& core_context) : core_context(core_context) {} EmuThread::~EmuThread() = default; @@ -53,6 +57,7 @@ void EmuThread::run() { }); emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); + emit HideLoadingScreen(); core_context.MakeCurrent(); @@ -303,6 +308,40 @@ public: } }; +static Frontend::WindowSystemType GetWindowSystemType() { + // Determine WSI type based on Qt platform. + QString platform_name = QGuiApplication::platformName(); + if (platform_name == QStringLiteral("windows")) + return Frontend::WindowSystemType::Windows; + else if (platform_name == QStringLiteral("xcb")) + return Frontend::WindowSystemType::X11; + else if (platform_name == QStringLiteral("wayland")) + return Frontend::WindowSystemType::Wayland; + + LOG_CRITICAL(Frontend, "Unknown Qt platform!"); + return Frontend::WindowSystemType::Windows; +} + +static Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { + Frontend::EmuWindow::WindowSystemInfo wsi; + wsi.type = GetWindowSystemType(); + + // Our Win32 Qt external doesn't have the private API. +#if defined(WIN32) || defined(__APPLE__) + wsi.render_surface = window ? reinterpret_cast(window->winId()) : nullptr; +#else + QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); + wsi.display_connection = pni->nativeResourceForWindow("display", window); + if (wsi.type == Frontend::WindowSystemType::Wayland) + wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; + else + wsi.render_surface = window ? reinterpret_cast(window->winId()) : nullptr; +#endif + wsi.render_surface_scale = window ? static_cast(window->devicePixelRatio()) : 1.0f; + + return wsi; +} + GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread, bool is_secondary_) : QWidget(parent_), EmuWindow(is_secondary_), emu_thread(emu_thread) { @@ -539,6 +578,9 @@ bool GRenderWindow::InitRenderTarget() { break; } + // Update the Window System information with the new render target + window_info = GetWindowSystemInfo(child_widget->windowHandle()); + child_widget->resize(Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight); layout()->addWidget(child_widget); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 12da2a743..7f83d3a90 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -76,6 +76,7 @@ void ConfigureGraphics::SetConfiguration() { ui->toggle_accurate_mul->setChecked(Settings::values.shaders_accurate_mul.GetValue()); ui->toggle_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new.GetValue()); + ui->graphics_api_combo->setCurrentIndex(static_cast(Settings::values.graphics_api.GetValue())); if (Settings::IsConfiguringGlobal()) { ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue()); @@ -95,6 +96,7 @@ void ConfigureGraphics::ApplyConfiguration() { ui->toggle_disk_shader_cache, use_disk_shader_cache); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync_new, ui->toggle_vsync_new, use_vsync_new); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.graphics_api, ui->graphics_api_combo); if (Settings::IsConfiguringGlobal()) { Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked(); diff --git a/src/common/hash.h b/src/common/hash.h index 079039cfc..44da9e25d 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/cityhash.h" #include "common/common_types.h" @@ -41,6 +42,13 @@ inline u64 HashCombine(std::size_t& seed, const u64 hash) { return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2); } +template +struct IdentityHash { + T operator()(const T& value) const { + return value; + } +}; + /// A helper template that ensures the padding in a struct is initialized by memsetting to 0. template struct HashableStruct { diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index c01c8a376..be086eb0c 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -236,6 +236,7 @@ void DebuggerBackend::Write(const Entry& entry) { CLS(Render) \ SUB(Render, Software) \ SUB(Render, OpenGL) \ + SUB(Render, Vulkan) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/common/logging/formatter.h b/src/common/logging/formatter.h index ad6adb143..3d4d70902 100644 --- a/src/common/logging/formatter.h +++ b/src/common/logging/formatter.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include // adapted from https://github.com/fmtlib/fmt/issues/2704 // a generic formatter for enum classes diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 1109ce7c3..8ff72e68b 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" #include "common/logging/formatter.h" + namespace Log { // trims up to and including the last of ../, ..\, src/, src\ in a string @@ -103,6 +104,7 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend + Render_Vulkan, ///< Vulkan backend Audio, ///< Audio emulation Audio_DSP, ///< The HLE and LLE implementations of the DSP Audio_Sink, ///< Emulator audio output backend diff --git a/src/common/math_util.h b/src/common/math_util.h index 1e2cc546b..ad6a21c5a 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include namespace Common { diff --git a/src/common/settings.h b/src/common/settings.h index 9b0559185..d6a780d3f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -446,7 +446,7 @@ struct Values { Setting allow_plugin_loader{true, "allow_plugin_loader"}; // Renderer - SwitchableSetting graphics_api{GraphicsAPI::OpenGL, "graphics_api"}; + SwitchableSetting graphics_api{GraphicsAPI::Vulkan, "graphics_api"}; SwitchableSetting use_hw_renderer{true, "use_hw_renderer"}; SwitchableSetting use_hw_shader{true, "use_hw_shader"}; SwitchableSetting separable_shader{false, "use_separable_shader"}; diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index d0cd341ff..98b7dd79d 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -14,6 +14,15 @@ namespace Frontend { +/// Information for the Graphics Backends signifying what type of screen pointer is in +/// WindowInformation +enum class WindowSystemType : u8 { + Headless, + Windows, + X11, + Wayland, +}; + struct Frame; /** * For smooth Vsync rendering, we want to always present the latest frame that the core generates, @@ -122,6 +131,23 @@ public: Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight}; }; + /// Data describing host window system information + struct WindowSystemInfo { + // Window system type. Determines which GL context or Vulkan WSI is used. + WindowSystemType type = WindowSystemType::Headless; + + // Connection to a display server. This is used on X11 and Wayland platforms. + void* display_connection = nullptr; + + // Render surface. This is a pointer to the native window handle, which depends + // on the platform. e.g. HWND for Windows, Window for X11. If the surface is + // set to nullptr, the video backend will run in headless mode. + void* render_surface = nullptr; + + // Scale of the render surface. For hidpi systems, this will be >1. + float render_surface_scale = 1.0f; + }; + /// Polls window events virtual void PollEvents() = 0; @@ -185,6 +211,13 @@ public: config = val; } + /** + * Returns system information about the drawing area. + */ + const WindowSystemInfo& GetWindowInfo() const { + return window_info; + } + /** * Gets the framebuffer layout (width, height, and screen regions) * @note This method is thread-safe @@ -233,6 +266,7 @@ protected: } bool is_secondary{}; + WindowSystemInfo window_info; private: /** diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 5b52f4e38..3b1c6ba93 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -595,33 +595,6 @@ bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const { return GetPhysicalRef(paddr); } -PAddr MemorySystem::ClampPhysicalAddress(PAddr base, PAddr address) const { - struct MemoryArea { - PAddr paddr_base; - u32 size; - }; - - constexpr std::array memory_areas = { - MemoryArea{VRAM_PADDR, VRAM_SIZE}, - MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE}, - MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE}, - MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE}, - }; - - const auto area = - std::ranges::find_if(memory_areas, [&](const MemoryArea& area) { - return base >= area.paddr_base && base <= area.paddr_base + area.size; - }); - - if (area == memory_areas.end()) { - LOG_ERROR(HW_Memory, "Unknown base address used for clamping {:#08X} at PC {:#08X}", base, - Core::GetRunningCore().GetPC()); - return address; - } - - return std::clamp(address, area->paddr_base, area->paddr_base + area->size); -} - u8* MemorySystem::GetPointer(const VAddr vaddr) { u8* page_pointer = impl->current_page_table->pointers[vaddr >> CITRA_PAGE_BITS]; if (page_pointer) { diff --git a/src/core/memory.h b/src/core/memory.h index f05632754..eac9fb374 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -587,9 +587,6 @@ public: /// Returns true if the address refers to a valid memory region bool IsValidPhysicalAddress(PAddr paddr) const; - /// Clamps the address to the boundaries of the memory region pointed by base - PAddr ClampPhysicalAddress(PAddr base, PAddr address) const; - /// Gets offset in FCRAM from a pointer inside FCRAM range u32 GetFCRAMOffset(const u8* pointer) const; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index bef0b3fc0..c6259968c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -85,11 +85,41 @@ add_library(video_core STATIC #temporary, move these back in alphabetical order before merging renderer_opengl/gl_format_reinterpreter.cpp renderer_opengl/gl_format_reinterpreter.h + renderer_vulkan/pica_to_vk.h + renderer_vulkan/renderer_vulkan.cpp + renderer_vulkan/renderer_vulkan.h + renderer_vulkan/vk_common.cpp + renderer_vulkan/vk_common.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_instance.cpp + renderer_vulkan/vk_instance.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_platform.cpp + renderer_vulkan/vk_platform.h + renderer_vulkan/vk_renderpass_cache.cpp + renderer_vulkan/vk_renderpass_cache.h + renderer_vulkan/vk_shader_gen.cpp + renderer_vulkan/vk_shader_gen.h + renderer_vulkan/vk_shader.cpp + renderer_vulkan/vk_shader.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_task_scheduler.cpp + renderer_vulkan/vk_task_scheduler.h + renderer_vulkan/vk_texture_runtime.cpp + renderer_vulkan/vk_texture_runtime.h shader/debug_data.h shader/shader.cpp shader/shader.h + shader/shader_cache.h shader/shader_interpreter.cpp shader/shader_interpreter.h + shader/shader_uniforms.cpp + shader/shader_uniforms.h swrasterizer/clipper.cpp swrasterizer/clipper.h swrasterizer/framebuffer.cpp @@ -160,8 +190,11 @@ endif() create_target_directory_groups(video_core) +# Include Vulkan headers +target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include) +target_include_directories(video_core PRIVATE ../../externals/vma) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad nihstro-headers Boost::serialization) +target_link_libraries(video_core PRIVATE glad glm::glm SPIRV glslang nihstro-headers Boost::serialization) set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if (ARCHITECTURE_x86_64) diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index d2f8874fe..88382d904 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -40,7 +40,7 @@ void Zero(T& o) { State::State() : geometry_pipeline(*this) { auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { using Pica::Shader::OutputVertex; - auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, + auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }; diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index 7066c53a9..108c95924 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -136,7 +136,7 @@ inline void MortonCopyTile(u32 stride, std::span tile_buffer, std::sp } template -static void MortonCopy(u32 stride, u32 height, u32 start_offset, +static void MortonCopy(u32 stride, u32 height, u32 start_offset, u32 end_offset, std::span linear_buffer, std::span tiled_buffer) { @@ -148,7 +148,6 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, // becomes zero for 4-bit textures! constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel; - const u32 end_offset = start_offset + static_cast(tiled_buffer.size()); // Does this line have any significance? //u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel; @@ -216,7 +215,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, } } -using MortonFunc = void (*)(u32, u32, u32, std::span, std::span); +using MortonFunc = void (*)(u32, u32, u32, u32, std::span, std::span); static constexpr std::array UNSWIZZLE_TABLE = { MortonCopy, // 0 diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 0144686e3..7509a0f6e 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/alignment.h" #include "common/logging/log.h" @@ -46,9 +47,9 @@ class RasterizerAccelerated; template class RasterizerCache : NonCopyable { public: - using TextureRuntime = typename T::Runtime; - using Surface = std::shared_ptr; - using Watcher = SurfaceWatcher; + using TextureRuntime = typename T::RuntimeType; + using Surface = std::shared_ptr; + using Watcher = SurfaceWatcher; private: /// Declare rasterizer interval types @@ -754,7 +755,7 @@ auto RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& confi params.type = SurfaceType::Fill; params.res_scale = std::numeric_limits::max(); - Surface new_surface = std::make_shared(params, runtime); + Surface new_surface = std::make_shared(params, runtime); std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); if (config.fill_32bit) { @@ -893,32 +894,23 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i ASSERT(load_start >= surface->addr && load_end <= surface->end); const auto& staging = runtime.FindStaging( - surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true); + surface->width * surface->height * 4, true); MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); if (!source_ptr) [[unlikely]] { return; } const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); - const u32 start_offset = load_start - surface->addr; - const u32 upload_size = static_cast(upload_data.size()); MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); - /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); - } else { - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); - }*/ - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + if (surface->is_tiled) { + std::vector unswizzled_data(staging.size); + UnswizzleTexture(*surface, load_start - surface->addr, load_end - surface->addr, + upload_data, unswizzled_data); + runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped); } else { - UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); + runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped); } const BufferTextureCopy upload = { @@ -939,7 +931,7 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval ASSERT(flush_start >= surface->addr && flush_end <= surface->end); const auto& staging = runtime.FindStaging( - surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); + surface->width * surface->height * 4, false); const SurfaceParams params = surface->FromInterval(interval); const BufferTextureCopy download = { .buffer_offset = 0, @@ -956,25 +948,16 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval } const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); - const u32 start_offset = flush_start - surface->addr; - const u32 download_size = static_cast(download_dest.size()); MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); - if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto download_data = staging.mapped.subspan(start_offset, download_size); - /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(download_data, download_dest); - } else { - std::memcpy(download_dest.data(), download_data.data(), download_size); - }*/ - std::memcpy(download_dest.data(), download_data.data(), download_size); + if (surface->is_tiled) { + std::vector swizzled_data(staging.size); + runtime.FormatConvert(surface->pixel_format, false, swizzled_data, swizzled_data); + SwizzleTexture(*surface, flush_start - surface->addr, flush_end - surface->addr, + staging.mapped, download_dest); } else { - SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); + runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest); } } @@ -1228,7 +1211,7 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& r template auto RasterizerCache::CreateSurface(SurfaceParams& params) -> Surface { - Surface surface = std::make_shared(params, runtime); + Surface surface = std::make_shared(params, runtime); surface->invalid_regions.insert(surface->GetInterval()); return surface; diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index c8e4ab1f2..586565b8d 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -3,30 +3,26 @@ // Refer to the license.txt file included. #pragma once -#include #include "common/assert.h" -#include "core/memory.h" #include "video_core/texture/texture_decode.h" #include "video_core/rasterizer_cache/morton_swizzle.h" #include "video_core/rasterizer_cache/surface_params.h" #include "video_core/rasterizer_cache/utils.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/video_core.h" namespace VideoCore { -void SwizzleTexture(const SurfaceParams& params, u32 start_offset, +void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset, std::span source_linear, std::span dest_tiled) { const u32 func_index = static_cast(params.pixel_format); const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index]; - SwizzleImpl(params.stride, params.height, start_offset, source_linear, dest_tiled); + SwizzleImpl(params.stride, params.height, start_offset, end_offset, source_linear, dest_tiled); } -void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, +void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset, std::span source_tiled, std::span dest_linear) { const u32 func_index = static_cast(params.pixel_format); const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index]; - UnswizzleImpl(params.stride, params.height, start_offset, dest_linear, source_tiled); + UnswizzleImpl(params.stride, params.height, start_offset, end_offset, dest_linear, source_tiled); } ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) { @@ -68,4 +64,4 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d return result; } -} // namespace OpenGL +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index e9528e9e7..1de5faca1 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -14,7 +14,6 @@ struct HostTextureTag { PixelFormat format{}; u32 width = 0; u32 height = 0; - u32 levels = 1; u32 layers = 1; auto operator<=>(const HostTextureTag&) const noexcept = default; @@ -45,7 +44,7 @@ class SurfaceParams; [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); -void SwizzleTexture(const SurfaceParams& params, u32 start_offset, +void SwizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset, std::span source_linear, std::span dest_tiled); /** @@ -56,7 +55,7 @@ void SwizzleTexture(const SurfaceParams& params, u32 start_offset, * @param source_tiled The source morton swizzled data. * @param dest_linear The output buffer where the generated linear data will be written to. */ -void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, +void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, u32 end_offset, std::span source_tiled, std::span dest_linear); } // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a17024a50..0d9a825eb 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,10 +9,6 @@ #include "common/common_types.h" #include "core/hw/gpu.h" -namespace OpenGL { -struct ScreenInfo; -} - namespace Pica::Shader { struct OutputVertex; } // namespace Pica::Shader @@ -73,13 +69,6 @@ public: return false; } - /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, - PAddr framebuffer_addr, u32 pixel_stride, - OpenGL::ScreenInfo& screen_info) { - return false; - } - /// Attempt to draw using hardware shaders virtual bool AccelerateDrawBatch(bool is_indexed) { return false; diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h index 3ee18294a..21fcd64ec 100644 --- a/src/video_core/regs_framebuffer.h +++ b/src/video_core/regs_framebuffer.h @@ -159,6 +159,7 @@ struct FramebufferRegs { } stencil_test; union { + u32 depth_color_mask; BitField<0, 1, u32> depth_test_enable; BitField<4, 3, CompareFunc> depth_test_func; BitField<8, 1, u32> red_enable; diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 94b9f7502..ee4835de8 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -6,8 +6,7 @@ #include #include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" +#include "common/vector_math.h" #include "video_core/pica_types.h" namespace Pica { @@ -18,7 +17,7 @@ struct RasterizerRegs { KeepAll = 0, KeepClockWise = 1, KeepCounterClockWise = 2, - // TODO: What does the third value imply? + KeepAll2 = 3 }; union { diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 17a9bb72c..68d1e964e 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -21,7 +21,3 @@ void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) { update_layout(*secondary_window); } } - -void RendererBase::Sync() { - rasterizer->SyncEntireState(); -} diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 2a004ad99..55606da99 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -21,6 +21,9 @@ public: /// Initialize the renderer virtual VideoCore::ResultStatus Init() = 0; + /// Returns the rasterizer owned by the renderer + virtual VideoCore::RasterizerInterface* Rasterizer() = 0; + /// Shutdown the renderer virtual void ShutDown() = 0; @@ -40,6 +43,8 @@ public: /// Cleans up after video dumping is ended virtual void CleanupVideoDumping() = 0; + virtual void Sync() = 0; + /// Updates the framebuffer layout of the contained render window handle. void UpdateCurrentFramebufferLayout(bool is_portrait_mode = {}); @@ -54,10 +59,6 @@ public: return m_current_frame; } - VideoCore::RasterizerInterface* Rasterizer() const { - return rasterizer.get(); - } - Frontend::EmuWindow& GetRenderWindow() { return render_window; } @@ -66,12 +67,9 @@ public: return render_window; } - void Sync(); - protected: Frontend::EmuWindow& render_window; ///< Reference to the render window handle. Frontend::EmuWindow* secondary_window; ///< Reference to the secondary render window handle. - std::unique_ptr rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer }; diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 9dd6cbb44..83dd57718 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -112,14 +112,15 @@ void Driver::ReportDriverInfo() { } void Driver::DeduceVendor() { - if (gpu_vendor.contains("NVIDIA")) { + if (gpu_vendor.find("NVIDIA") != gpu_vendor.npos) { vendor = Vendor::Nvidia; - } else if (gpu_vendor.contains("ATI") || - gpu_vendor.contains("Advanced Micro Devices")) { + } else if ((gpu_vendor.find("ATI") != gpu_vendor.npos) || + (gpu_vendor.find("AMD") != gpu_vendor.npos) || + (gpu_vendor.find("Advanced Micro Devices") != gpu_vendor.npos)) { vendor = Vendor::AMD; - } else if (gpu_vendor.contains("Intel")) { + } else if (gpu_vendor.find("Intel") != gpu_vendor.npos) { vendor = Vendor::Intel; - } else if (gpu_vendor.contains("GDI Generic")) { + } else if (gpu_vendor.find("GDI Generic") != gpu_vendor.npos) { vendor = Vendor::Generic; } } diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index d5202169f..7b91fbd58 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -243,17 +243,12 @@ private: }; FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { - const std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - const std::string_view version{reinterpret_cast(glGetString(GL_VERSION))}; - auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { const u32 dst_index = static_cast(dest); return reinterpreters[dst_index].push_back(std::move(obj)); }; Register(VideoCore::PixelFormat::RGBA8, std::make_unique()); - LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation"); - Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index eaf1751a3..f563c429c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -20,6 +20,9 @@ class EmuWindow; } namespace OpenGL { + +struct ScreenInfo; + class Driver; class ShaderProgramManager; @@ -43,7 +46,7 @@ public: bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, - u32 pixel_stride, ScreenInfo& screen_info) override; + u32 pixel_stride, ScreenInfo& screen_info); bool AccelerateDrawBatch(bool is_indexed) override; /// Syncs entire status to match PICA registers diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 66883e7e6..c0a2cf94a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -41,10 +41,12 @@ struct LightSrc { float dist_atten_scale; }; -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at -// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -// Not following that rule will cause problems on some AMD drivers. +/** + * Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned + * NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at + * the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. + * Not following that rule will cause problems on some AMD drivers. + */ struct UniformData { int framebuffer_scale; int alphatest_ref; @@ -81,8 +83,10 @@ static_assert(sizeof(UniformData) == 0x4F0, static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); -/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. -// NOTE: the same rule from UniformData also applies here. +/** + * Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. + * NOTE: the same rule from UniformData also applies here. + */ struct PicaUniformsData { void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index ec6625731..6230f7037 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -124,6 +124,17 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f return DEFAULT_TUPLE; } +void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest) { + if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) { + Pica::Texture::ConvertABGRToRGBA(source, dest); + } else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) { + Pica::Texture::ConvertBGRToRGB(source, dest); + } else { + std::memcpy(dest.data(), source.data(), source.size()); + } +} + OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type) { @@ -302,9 +313,20 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); } -MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); +Surface::~Surface() { + const VideoCore::HostTextureTag tag = { + .format = pixel_format, + .width = GetScaledWidth(), + .height = GetScaledHeight(), + .layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u + }; + + runtime.texture_recycler.emplace(tag, std::move(texture)); +} + +MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64)); void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureUL); + MICROPROFILE_SCOPE(OpenGL_Upload); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); @@ -327,8 +349,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu upload.texture_rect.left, upload.texture_rect.bottom, upload.texture_rect.GetWidth(), upload.texture_rect.GetHeight(), - tuple.format, tuple.type, - reinterpret_cast(upload.buffer_offset)); + tuple.format, tuple.type, 0); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -339,9 +360,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu InvalidateAllWatcher(); } -MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); +MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64)); void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureDL); + MICROPROFILE_SCOPE(OpenGL_Download); // Ensure no bad interactions with GL_PACK_ALIGNMENT ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); @@ -361,7 +382,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi const auto& tuple = runtime.GetFormatTuple(pixel_format); glReadPixels(download.texture_rect.left, download.texture_rect.bottom, download.texture_rect.GetWidth(), download.texture_rect.GetHeight(), - tuple.format, tuple.type, reinterpret_cast(download.buffer_offset)); + tuple.format, tuple.type, 0); } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -390,11 +411,9 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { if (driver.IsOpenGLES()) { const auto& downloader_es = runtime.GetDownloaderES(); downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - rect_height, rect_width, - reinterpret_cast(download.buffer_offset)); + rect_height, rect_width, 0); } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - reinterpret_cast(download.buffer_offset)); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0); } } @@ -409,7 +428,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { const auto& tuple = runtime.GetFormatTuple(pixel_format); glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, - tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); + tuple.format, tuple.type, 0); const auto scaled_rect = upload.texture_rect * res_scale; const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 493d49b7b..7843e495c 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -70,6 +70,10 @@ public: /// Returns the OpenGL format tuple associated with the provided pixel format const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); + /// Performs required format convertions on the staging data + void FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest); + /// Allocates an OpenGL texture with the specified dimentions and format OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type); @@ -124,7 +128,7 @@ private: class Surface : public VideoCore::SurfaceBase { public: Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); - ~Surface() override = default; + ~Surface() override; /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging); @@ -148,8 +152,8 @@ public: }; struct Traits { - using Runtime = TextureRuntime; - using Surface = Surface; + using RuntimeType = TextureRuntime; + using SurfaceType = Surface; }; using RasterizerCache = VideoCore::RasterizerCache; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 609309585..3f7910b7a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -15,7 +15,6 @@ #include "core/memory.h" #include "core/tracer/recorder.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state.h" @@ -383,6 +382,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() { return VideoCore::ResultStatus::Success; } +VideoCore::RasterizerInterface* RendererOpenGL::Rasterizer() { + return rasterizer.get(); +} + /// Shutdown the renderer void RendererOpenGL::ShutDown() {} @@ -580,7 +583,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, + if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; @@ -1214,4 +1217,8 @@ void RendererOpenGL::CleanupVideoDumping() { mailbox->free_cv.notify_one(); } +void RendererOpenGL::Sync() { + rasterizer->SyncEntireState(); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 9f5ac66c6..c530861da 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,18 +55,16 @@ struct PresentationTexture { OGLTexture texture; }; +class RasterizerOpenGL; + class RendererOpenGL : public RendererBase { public: explicit RendererOpenGL(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window); ~RendererOpenGL() override; - /// Initialize the renderer VideoCore::ResultStatus Init() override; - - /// Shutdown the renderer + VideoCore::RasterizerInterface* Rasterizer() override; void ShutDown() override; - - /// Finalizes rendering the guest frame void SwapBuffers() override; /// Draws the latest frame from texture mailbox to the currently bound draw framebuffer in this @@ -75,9 +73,8 @@ public: /// Prepares for video dumping (e.g. create necessary buffers, etc) void PrepareVideoDumping() override; - - /// Cleans up after video dumping is ended void CleanupVideoDumping() override; + void Sync() override; private: void InitOpenGLObjects(); @@ -108,6 +105,7 @@ private: private: Driver driver; OpenGLState state; + std::unique_ptr rasterizer; // OpenGL object IDs OGLVertexArray vertex_array; diff --git a/src/video_core/renderer_vulkan/pica_to_vk.h b/src/video_core/renderer_vulkan/pica_to_vk.h new file mode 100644 index 000000000..6cbf3ecfa --- /dev/null +++ b/src/video_core/renderer_vulkan/pica_to_vk.h @@ -0,0 +1,278 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "common/logging/log.h" +#include "core/core.h" +#include "video_core/regs.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace PicaToVK { + +using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter; + +struct FilterInfo { + vk::Filter mag_filter, min_filter; + vk::SamplerMipmapMode mip_mode; +}; + +inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) { + constexpr std::array filter_table = { + vk::Filter::eNearest, + vk::Filter::eLinear + }; + + constexpr std::array mipmap_table = { + vk::SamplerMipmapMode::eNearest, + vk::SamplerMipmapMode::eLinear + }; + + return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)}; +} + +inline vk::Filter TextureFilterMode(TextureFilter mode) { + switch (mode) { + case TextureFilter::Linear: + return vk::Filter::eLinear; + case TextureFilter::Nearest: + return vk::Filter::eNearest; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown texture filtering mode {}", mode); + UNIMPLEMENTED(); + } + + return vk::Filter::eLinear; +} + +inline vk::SamplerMipmapMode TextureMipFilterMode(TextureFilter mip) { + switch (mip) { + case TextureFilter::Linear: + return vk::SamplerMipmapMode::eLinear; + case TextureFilter::Nearest: + return vk::SamplerMipmapMode::eNearest; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown texture mipmap filtering mode {}", mip); + UNIMPLEMENTED(); + } + + return vk::SamplerMipmapMode::eLinear; +} + +inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { + static constexpr std::array wrap_mode_table{{ + vk::SamplerAddressMode::eClampToEdge, + vk::SamplerAddressMode::eClampToBorder, + vk::SamplerAddressMode::eRepeat, + vk::SamplerAddressMode::eMirroredRepeat, + // TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the + // comments in enum WrapMode. + vk::SamplerAddressMode::eClampToEdge, + vk::SamplerAddressMode::eClampToBorder, + vk::SamplerAddressMode::eRepeat, + vk::SamplerAddressMode::eRepeat, + }}; + + const auto index = static_cast(mode); + + // Range check table for input + if (index >= wrap_mode_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown texture wrap mode {}", index); + UNREACHABLE(); + + return vk::SamplerAddressMode::eClampToEdge; + } + + if (index > 3) { + Core::System::GetInstance().TelemetrySession().AddField( + Common::Telemetry::FieldType::Session, "VideoCore_Pica_UnsupportedTextureWrapMode", + static_cast(index)); + LOG_WARNING(Render_Vulkan, "Using texture wrap mode {}", index); + } + + return wrap_mode_table[index]; +} + +inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) { + static constexpr std::array blend_equation_table{{ + vk::BlendOp::eAdd, + vk::BlendOp::eSubtract, + vk::BlendOp::eReverseSubtract, + vk::BlendOp::eMin, + vk::BlendOp::eMax, + }}; + + const auto index = static_cast(equation); + + // Range check table for input + if (index >= blend_equation_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown blend equation {}", index); + + // This return value is hwtested, not just a stub + return vk::BlendOp::eAdd; + } + + return blend_equation_table[index]; +} + +inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) { + static constexpr std::array blend_func_table{{ + vk::BlendFactor::eZero, // BlendFactor::Zero + vk::BlendFactor::eOne, // BlendFactor::One + vk::BlendFactor::eSrcColor, // BlendFactor::SourceColor + vk::BlendFactor::eOneMinusSrcColor, // BlendFactor::OneMinusSourceColor + vk::BlendFactor::eDstColor, // BlendFactor::DestColor + vk::BlendFactor::eOneMinusDstColor, // BlendFactor::OneMinusDestColor + vk::BlendFactor::eSrcAlpha, // BlendFactor::SourceAlpha + vk::BlendFactor::eOneMinusSrcAlpha, // BlendFactor::OneMinusSourceAlpha + vk::BlendFactor::eDstAlpha, // BlendFactor::DestAlpha + vk::BlendFactor::eOneMinusDstAlpha, // BlendFactor::OneMinusDestAlpha + vk::BlendFactor::eConstantColor, // BlendFactor::ConstantColor + vk::BlendFactor::eOneMinusConstantColor,// BlendFactor::OneMinusConstantColor + vk::BlendFactor::eConstantAlpha, // BlendFactor::ConstantAlpha + vk::BlendFactor::eOneMinusConstantAlpha,// BlendFactor::OneMinusConstantAlpha + vk::BlendFactor::eSrcAlphaSaturate, // BlendFactor::SourceAlphaSaturate + }}; + + const auto index = static_cast(factor); + + // Range check table for input + if (index >= blend_func_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown blend factor {}", index); + UNREACHABLE(); + + return vk::BlendFactor::eOne; + } + + return blend_func_table[index]; +} + +inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) { + static constexpr std::array logic_op_table{{ + vk::LogicOp::eClear, // Clear + vk::LogicOp::eAnd, // And + vk::LogicOp::eAndReverse, // AndReverse + vk::LogicOp::eCopy, // Copy + vk::LogicOp::eSet, // Set + vk::LogicOp::eCopyInverted, // CopyInverted + vk::LogicOp::eNoOp, // NoOp + vk::LogicOp::eInvert, // Invert + vk::LogicOp::eNand, // Nand + vk::LogicOp::eOr, // Or + vk::LogicOp::eNor, // Nor + vk::LogicOp::eXor, // Xor + vk::LogicOp::eEquivalent, // Equiv + vk::LogicOp::eAndInverted, // AndInverted + vk::LogicOp::eOrReverse, // OrReverse + vk::LogicOp::eOrInverted, // OrInverted + }}; + + const auto index = static_cast(op); + + // Range check table for input + if (index >= logic_op_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown logic op {}", index); + UNREACHABLE(); + + return vk::LogicOp::eCopy; + } + + return logic_op_table[index]; +} + +inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) { + static constexpr std::array compare_func_table{{ + vk::CompareOp::eNever, // CompareFunc::Never + vk::CompareOp::eAlways, // CompareFunc::Always + vk::CompareOp::eEqual, // CompareFunc::Equal + vk::CompareOp::eNotEqual, // CompareFunc::NotEqual + vk::CompareOp::eLess, // CompareFunc::LessThan + vk::CompareOp::eLessOrEqual, // CompareFunc::LessThanOrEqual + vk::CompareOp::eGreater, // CompareFunc::GreaterThan + vk::CompareOp::eGreaterOrEqual, // CompareFunc::GreaterThanOrEqual + }}; + + const auto index = static_cast(func); + + // Range check table for input + if (index >= compare_func_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown compare function {}", index); + UNREACHABLE(); + + return vk::CompareOp::eAlways; + } + + return compare_func_table[index]; +} + +inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) { + static constexpr std::array stencil_op_table{{ + vk::StencilOp::eKeep, // StencilAction::Keep + vk::StencilOp::eZero, // StencilAction::Zero + vk::StencilOp::eReplace, // StencilAction::Replace + vk::StencilOp::eIncrementAndClamp, // StencilAction::Increment + vk::StencilOp::eDecrementAndClamp, // StencilAction::Decrement + vk::StencilOp::eInvert, // StencilAction::Invert + vk::StencilOp::eIncrementAndWrap, // StencilAction::IncrementWrap + vk::StencilOp::eDecrementAndWrap, // StencilAction::DecrementWrap + }}; + + const auto index = static_cast(action); + + // Range check table for input + if (index >= stencil_op_table.size()) { + LOG_CRITICAL(Render_Vulkan, "Unknown stencil op {}", index); + UNREACHABLE(); + + return vk::StencilOp::eKeep; + } + + return stencil_op_table[index]; +} + +inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopology topology) { + switch (topology) { + case Pica::PipelineRegs::TriangleTopology::Fan: + return vk::PrimitiveTopology::eTriangleFan; + case Pica::PipelineRegs::TriangleTopology::List: + case Pica::PipelineRegs::TriangleTopology::Shader: + return vk::PrimitiveTopology::eTriangleList; + case Pica::PipelineRegs::TriangleTopology::Strip: + return vk::PrimitiveTopology::eTriangleStrip; + } +} + +inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { + switch (mode) { + case Pica::RasterizerRegs::CullMode::KeepAll: + case Pica::RasterizerRegs::CullMode::KeepAll2: + return vk::CullModeFlagBits::eNone; + case Pica::RasterizerRegs::CullMode::KeepClockWise: + case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: + return vk::CullModeFlagBits::eBack; + } +} + +inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) { + switch (mode) { + case Pica::RasterizerRegs::CullMode::KeepAll: + case Pica::RasterizerRegs::CullMode::KeepAll2: + case Pica::RasterizerRegs::CullMode::KeepClockWise: + return vk::FrontFace::eCounterClockwise; + case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: + return vk::FrontFace::eClockwise; + } +} + +inline Common::Vec4f ColorRGBA8(const u32 color) { + const auto rgba = + Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF}; + return rgba / 255.0f; +} + +inline Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) { + return Common::Vec3u{color.r, color.g, color.b} / 255.0f; +} + +} // namespace PicaToGL diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp new file mode 100644 index 000000000..43f30deec --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -0,0 +1,1055 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#define GLM_FORCE_DEPTH_ZERO_TO_ONE +#include +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/frontend/emu_window.h" +#include "core/frontend/framebuffer_layout.h" +#include "core/hw/gpu.h" +#include "core/hw/hw.h" +#include "core/hw/lcd.h" +#include "core/settings.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/video_core.h" + +namespace Vulkan { + +constexpr std::string_view vertex_shader = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; +layout (location = 0) out vec2 frag_tex_coord; + +// This is a truncated 3x3 matrix for 2D transformations: +// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. +// The third column performs translation. +// The third row could be used for projection, which we don't need in 2D. It hence is assumed to +// implicitly be [0, 0, 1] +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; +}; + +void main() { + vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix; + gl_Position = vec4(position.x, -position.y, 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} +)"; + +constexpr std::string_view fragment_shader = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; +}; + +layout (set = 0, binding = 0) uniform texture2D screen_textures[3]; +layout (set = 0, binding = 1) uniform sampler screen_sampler; + +void main() { + color = texture(sampler2D(screen_textures[screen_id_l], screen_sampler), frag_tex_coord); +} +)"; + +constexpr std::string_view fragment_shader_anaglyph = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +// Anaglyph Red-Cyan shader based on Dubois algorithm +// Constants taken from the paper: +// "Conversion of a Stereo Pair to Anaglyph with +// the Least-Squares Projection Method" +// Eric Dubois, March 2009 +const mat3 l = mat3( 0.437, 0.449, 0.164, + -0.062,-0.062,-0.024, + -0.048,-0.050,-0.017); +const mat3 r = mat3(-0.011,-0.032,-0.007, + 0.377, 0.761, 0.009, + -0.026,-0.093, 1.234); + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; +}; + +layout (set = 0, binding = 0) uniform texture2D screen_textures[3]; +layout (set = 0, binding = 1) uniform sampler screen_sampler; + +void main() { + vec4 color_tex_l = texture(sampler2D(screen_textures[screen_id_l], screen_sampler), frag_tex_coord); + vec4 color_tex_r = texture(sampler2D(screen_textures[screen_id_r], screen_sampler), frag_tex_coord); + color = vec4(color_tex_l.rgb*l+color_tex_r.rgb*r, color_tex_l.a); +} +)"; + +constexpr std::string_view fragment_shader_interlaced = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; +}; + +layout (set = 0, binding = 0) uniform texture2D screen_textures[3]; +layout (set = 0, binding = 1) uniform sampler screen_sampler; + +void main() { + float screen_row = o_resolution.x * frag_tex_coord.x; + if (int(screen_row) % 2 == reverse_interlaced) + color = texture(sampler2D(screen_textures[screen_id_l], screen_sampler), frag_tex_coord); + else + color = texture(sampler2D(screen_textures[screen_id_r], screen_sampler), frag_tex_coord); +} +)"; + + +/// Vertex structure that the drawn screen rectangles are composed of. +struct ScreenRectVertex { + ScreenRectVertex() = default; + ScreenRectVertex(float x, float y, float u, float v) : + position{Common::MakeVec(x, y)}, tex_coord{Common::MakeVec(u, v)} {} + + Common::Vec2f position; + Common::Vec2f tex_coord; +}; + +constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192; + +/** + * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left + * corner and (width, height) on the lower-bottom. + * + * The projection part of the matrix is trivial, hence these operations are represented + * by a 3x2 matrix. + * + * @param flipped Whether the frame should be flipped upside down. + */ +static std::array MakeOrthographicMatrix(float width, float height, bool flipped) { + + std::array matrix; // Laid out in column-major order + + // Last matrix row is implicitly assumed to be [0, 0, 1]. + if (flipped) { + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = 2.f / height; matrix[5] = -1.f; + // clang-format on + } else { + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + // clang-format on + } + + return matrix; +} + +RendererVulkan::RendererVulkan(Frontend::EmuWindow& window) + : RendererBase{window}, instance{window}, scheduler{instance}, renderpass_cache{instance, scheduler}, + runtime{instance, scheduler, renderpass_cache}, swapchain{instance, renderpass_cache}, + vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}} { + + auto& telemetry_session = Core::System::GetInstance().TelemetrySession(); + constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA"); + telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650"); + telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.1"); + + window.mailbox = nullptr; +} + +RendererVulkan::~RendererVulkan() { + vk::Device device = instance.GetDevice(); + device.waitIdle(); + + device.destroyPipelineLayout(present_pipeline_layout); + device.destroyShaderModule(present_vertex_shader); + device.destroyDescriptorSetLayout(present_descriptor_layout); + device.destroyDescriptorUpdateTemplate(present_update_template); + + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + device.destroyPipeline(present_pipelines[i]); + device.destroyShaderModule(present_shaders[i]); + } + + for (auto& sampler : present_samplers) { + device.destroySampler(sampler); + } + + for (auto& info : screen_infos) { + const VideoCore::HostTextureTag tag = { + .format = VideoCore::PixelFormatFromGPUPixelFormat(info.texture.format), + .width = info.texture.width, + .height = info.texture.height, + .layers = 1 + }; + + runtime.Recycle(tag, std::move(info.texture.alloc)); + } + + rasterizer.reset(); +} + +VideoCore::ResultStatus RendererVulkan::Init() { + CompileShaders(); + BuildLayouts(); + BuildPipelines(); + + // Create the rasterizer + rasterizer = std::make_unique(render_window, instance, scheduler, + runtime, renderpass_cache); + + return VideoCore::ResultStatus::Success; +} + +VideoCore::RasterizerInterface* RendererVulkan::Rasterizer() { + return rasterizer.get(); +} + +void RendererVulkan::ShutDown() {} + +void RendererVulkan::Sync() { + rasterizer->SyncEntireState(); +} + +void RendererVulkan::PrepareRendertarget() { + for (u32 i = 0; i < 3; i++) { + const u32 fb_id = i == 2 ? 1 : 0; + const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; + + // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 + u32 lcd_color_addr = + (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); + lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; + LCD::Regs::ColorFill color_fill{0}; + LCD::Read(color_fill.raw, lcd_color_addr); + + if (color_fill.is_enabled) { + const vk::ClearColorValue clear_color = { + .float32 = std::array{ + color_fill.color_r / 255.0f, + color_fill.color_g / 255.0f, + color_fill.color_b / 255.0f, + 1.0f + } + }; + + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + TextureInfo& texture = screen_infos[i].texture; + runtime.Transition(command_buffer, texture.alloc, vk::ImageLayout::eTransferDstOptimal, 0, texture.alloc.levels); + command_buffer.clearColorImage(texture.alloc.image, vk::ImageLayout::eTransferDstOptimal, + clear_color, range); + } else { + TextureInfo& texture = screen_infos[i].texture; + if (texture.width != framebuffer.width || texture.height != framebuffer.height || + texture.format != framebuffer.color_format) { + + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(texture, framebuffer); + } + + LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); + + // Resize the texture in case the framebuffer size has changed + texture.width = framebuffer.width; + texture.height = framebuffer.height; + } + } +} + +void RendererVulkan::BeginRendering() { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[current_pipeline]); + + std::array present_textures; + for (std::size_t i = 0; i < screen_infos.size(); i++) { + const auto& info = screen_infos[i]; + present_textures[i] = vk::DescriptorImageInfo{ + .imageView = info.display_texture ? info.display_texture->image_view + : info.texture.alloc.image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + }; + } + + present_textures[3] = vk::DescriptorImageInfo{ + .sampler = present_samplers[current_sampler] + }; + + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = scheduler.GetDescriptorPool(), + .descriptorSetCount = 1, + .pSetLayouts = &present_descriptor_layout + }; + + vk::Device device = instance.GetDevice(); + vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; + device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures[0]); + + command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, + 0, 1, &set, 0, nullptr); + + const vk::ClearValue clear_value = { + .color = clear_color + }; + + const auto& layout = render_window.GetFramebufferLayout(); + const vk::RenderPassBeginInfo begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = swapchain.GetFramebuffer(), + .renderArea = vk::Rect2D{ + .offset = {0, 0}, + .extent = {layout.width, layout.height} + }, + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); +} + +void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye) { + + if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0) + right_eye = false; + + const PAddr framebuffer_addr = + framebuffer.active_fb == 0 + ? (!right_eye ? framebuffer.address_left1 : framebuffer.address_right1) + : (!right_eye ? framebuffer.address_left2 : framebuffer.address_right2); + + LOG_TRACE(Render_Vulkan, "0x{:08x} bytes from 0x{:08x}({}x{}), fmt {:x}", + framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(), + framebuffer.height.Value(), framebuffer.format); + + int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + std::size_t pixel_stride = framebuffer.stride / bpp; + + // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately + ASSERT(pixel_stride * bpp == framebuffer.stride); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default + // only allows rows to have a memory alignement of 4. + ASSERT(pixel_stride % 4 == 0); + + if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { + ASSERT(false); + // Reset the screen info's display texture to its own permanent texture + /*screen_info.display_texture = &screen_info.texture; + screen_info.display_texcoords = Common::Rectangle(0.f, 0.f, 1.f, 1.f); + + Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); + + vk::Rect2D region{{0, 0}, {framebuffer.width, framebuffer.height}}; + std::span framebuffer_data(VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr), + screen_info.texture.GetSize()); + + screen_info.texture.Upload(0, 1, pixel_stride, region, framebuffer_data);*/ + } +} + +void RendererVulkan::CompileShaders() { + vk::Device device = instance.GetDevice(); + present_vertex_shader = Compile(vertex_shader, vk::ShaderStageFlagBits::eVertex, + device, ShaderOptimization::Debug); + present_shaders[0] = Compile(fragment_shader, vk::ShaderStageFlagBits::eFragment, + device, ShaderOptimization::Debug); + present_shaders[1] = Compile(fragment_shader_anaglyph, vk::ShaderStageFlagBits::eFragment, + device, ShaderOptimization::Debug); + present_shaders[2] = Compile(fragment_shader_interlaced, vk::ShaderStageFlagBits::eFragment, + device, ShaderOptimization::Debug); + + auto properties = instance.GetPhysicalDevice().getProperties(); + for (std::size_t i = 0; i < present_samplers.size(); i++) { + const vk::Filter filter_mode = i == 0 ? vk::Filter::eLinear : vk::Filter::eNearest; + const vk::SamplerCreateInfo sampler_info = { + .magFilter = filter_mode, + .minFilter = filter_mode, + .mipmapMode = vk::SamplerMipmapMode::eLinear, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + .anisotropyEnable = true, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false + }; + + present_samplers[i] = device.createSampler(sampler_info); + } +} + +void RendererVulkan::BuildLayouts() { + const std::array present_layout_bindings = { + vk::DescriptorSetLayoutBinding{ + .binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 3, + .stageFlags = vk::ShaderStageFlagBits::eFragment + }, + vk::DescriptorSetLayoutBinding{ + .binding = 1, + .descriptorType = vk::DescriptorType::eSampler, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eFragment + } + }; + + const vk::DescriptorSetLayoutCreateInfo present_layout_info = { + .bindingCount = static_cast(present_layout_bindings.size()), + .pBindings = present_layout_bindings.data() + }; + + vk::Device device = instance.GetDevice(); + present_descriptor_layout = device.createDescriptorSetLayout(present_layout_info); + + const std::array update_template_entries = { + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 3, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo) + }, + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampler, + .offset = 3 * sizeof(vk::DescriptorImageInfo), + .stride = 0 + } + }; + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), + .pDescriptorUpdateEntries = update_template_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = present_descriptor_layout + }; + + present_update_template = device.createDescriptorUpdateTemplate(template_info); + + const vk::PushConstantRange push_range = { + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .offset = 0, + .size = sizeof(PresentUniformData), + }; + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1, + .pSetLayouts = &present_descriptor_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range + }; + + present_pipeline_layout = device.createPipelineLayout(layout_info); +} + +void RendererVulkan::BuildPipelines() { + const vk::VertexInputBindingDescription binding = { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = vk::VertexInputRate::eVertex + }; + + const std::array attributes = { + vk::VertexInputAttributeDescription{ + .location = 0, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, position) + }, + vk::VertexInputAttributeDescription{ + .location = 1, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, tex_coord) + } + }; + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &binding, + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data() + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleStrip, + .primitiveRestartEnable = false + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = vk::CullModeFlagBits::eNone, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f} + }; + + const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}}; + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &placeholder_viewport, + .scissorCount = 1, + .pScissors = &placeholder_scissor, + }; + + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data() + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = false, + .depthWriteEnable = false, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = false, + .stencilTestEnable = false + }; + + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + const std::array shader_stages = { + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = present_vertex_shader, + .pName = "main" + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = present_shaders[i], + .pName = "main" + }, + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = present_pipeline_layout, + .renderPass = renderpass_cache.GetPresentRenderpass() + }; + + vk::Device device = instance.GetDevice(); + if (const auto result = device.createGraphicsPipeline({}, pipeline_info); + result.result == vk::Result::eSuccess) { + present_pipelines[i] = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Unable to build present pipelines"); + UNREACHABLE(); + } + } +} + +void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer) { + TextureInfo old_texture = texture; + texture = TextureInfo { + .alloc = runtime.Allocate(framebuffer.width, framebuffer.height, + VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format), + VideoCore::TextureType::Texture2D), + .width = framebuffer.width, + .height = framebuffer.height, + .format = framebuffer.color_format, + }; + + // Recyle the old texture after allocation to avoid having duplicates of the same allocation in the recycler + if (old_texture.width != 0 && old_texture.height != 0) { + const VideoCore::HostTextureTag tag = { + .format = VideoCore::PixelFormatFromGPUPixelFormat(old_texture.format), + .width = old_texture.width, + .height = old_texture.height, + .layers = 1 + }; + + runtime.Recycle(tag, std::move(old_texture.alloc)); + } +} + +void RendererVulkan::ReloadSampler() { + current_sampler = !Settings::values.filter_mode; +} + +void RendererVulkan::ReloadPipeline() { + switch (Settings::values.render_3d) { + case Settings::StereoRenderOption::Anaglyph: + current_pipeline = 1; + break; + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: + current_pipeline = 2; + draw_info.reverse_interlaced = + Settings::values.render_3d == Settings::StereoRenderOption::ReverseInterlaced; + break; + default: + current_pipeline = 0; + break; + } +} + +void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h) { + auto& screen_info = screen_infos[screen_id]; + const auto& texcoords = screen_info.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = { + ScreenRectVertex{x, y, texcoords.bottom, texcoords.left}, + ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right}, + ScreenRectVertex{x, y + h, texcoords.top, texcoords.left}, + ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.right}, + }; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + // As this is the "DrawSingleScreenRotated" function, the output resolution dimensions have been + // swapped. If a non-rotated draw-screen function were to be added for book-mode games, those + // should probably be set to the standard (w, h, 1.0 / w, 1.0 / h) ordering. + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info.texture.width); + const float height = static_cast(screen_info.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; + draw_info.screen_id_l = screen_id; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h) { + auto& screen_info = screen_infos[screen_id]; + const auto& texcoords = screen_info.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = { + ScreenRectVertex{x, y, texcoords.bottom, texcoords.right}, + ScreenRectVertex{x + w, y, texcoords.top, texcoords.right}, + ScreenRectVertex{x, y + h, texcoords.bottom, texcoords.left}, + ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.left}, + }; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info.texture.width); + const float height = static_cast(screen_info.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; + draw_info.screen_id_l = screen_id; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + const vk::ClearValue clear_value = { + .color = clear_color + }; + + const vk::RenderPassBeginInfo begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = swapchain.GetFramebuffer(), + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + command_buffer.endRenderPass(); +} + +void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, + float x, float y, float w, float h) { + const ScreenInfo& screen_info_l = screen_infos[screen_id_l]; + const auto& texcoords = screen_info_l.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = { + ScreenRectVertex{x, y, texcoords.bottom, texcoords.left}, + ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right}, + ScreenRectVertex{x, y + h, texcoords.top, texcoords.left}, + ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.right} + }; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info_l.texture.width); + const float height = static_cast(screen_info_l.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + + draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; + draw_info.screen_id_l = screen_id_l; + draw_info.screen_id_r = screen_id_r; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, + float x, float y, float w, float h) { + const ScreenInfo& screen_info_l = screen_infos[screen_id_l]; + const auto& texcoords = screen_info_l.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), + }}; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info_l.texture.width); + const float height = static_cast(screen_info_l.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + + draw_info.o_resolution = Common::Vec4f{w, h, 1.0f / w, 1.0f / h}; + draw_info.screen_id_l = screen_id_l; + draw_info.screen_id_r = screen_id_r; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool flipped) { + if (VideoCore::g_renderer_bg_color_update_requested.exchange(false)) { + // Update background color before drawing + clear_color.float32[0] = Settings::values.bg_red; + clear_color.float32[1] = Settings::values.bg_green; + clear_color.float32[2] = Settings::values.bg_blue; + } + + if (VideoCore::g_renderer_sampler_update_requested.exchange(false)) { + // Set the new filtering mode for the sampler + ReloadSampler(); + } + + if (VideoCore::g_renderer_shader_update_requested.exchange(false)) { + ReloadPipeline(); + } + + const auto& top_screen = layout.top_screen; + const auto& bottom_screen = layout.bottom_screen; + + // Set projection matrix + //draw_info.modelview = + // MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height), flipped); + draw_info.modelview = glm::transpose(glm::ortho(0.f, static_cast(layout.width), + static_cast(layout.height), 0.0f, + 0.f, 1.f)); + + const bool stereo_single_screen = + Settings::values.render_3d == Settings::StereoRenderOption::Anaglyph || + Settings::values.render_3d == Settings::StereoRenderOption::Interlaced || + Settings::values.render_3d == Settings::StereoRenderOption::ReverseInterlaced; + + // Bind necessary state before drawing the screens + BeginRendering(); + + draw_info.layer = 0; + if (layout.top_screen_enabled) { + if (layout.is_rotated) { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreenRotated(0, top_screen.left, + top_screen.top, top_screen.GetWidth(), + top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreenRotated(0, (float)top_screen.left / 2, + (float)top_screen.top, (float)top_screen.GetWidth() / 2, + (float)top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated(1, + ((float)top_screen.left / 2) + ((float)layout.width / 2), + (float)top_screen.top, (float)top_screen.GetWidth() / 2, + (float)top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreenRotated(0, layout.top_screen.left, + layout.top_screen.top, layout.top_screen.GetWidth(), + layout.top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated(1, + layout.cardboard.top_screen_right_eye + + ((float)layout.width / 2), + layout.top_screen.top, layout.top_screen.GetWidth(), + layout.top_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereoRotated(0, 1, (float)top_screen.left, (float)top_screen.top, + (float)top_screen.GetWidth(), (float)top_screen.GetHeight()); + } + } else { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreen(0, (float)top_screen.left, (float)top_screen.top, + (float)top_screen.GetWidth(), (float)top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreen(0, (float)top_screen.left / 2, (float)top_screen.top, + (float)top_screen.GetWidth() / 2, (float)top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(1, + ((float)top_screen.left / 2) + ((float)layout.width / 2), + (float)top_screen.top, (float)top_screen.GetWidth() / 2, + (float)top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreen(0, layout.top_screen.left, layout.top_screen.top, + layout.top_screen.GetWidth(), layout.top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(1, + layout.cardboard.top_screen_right_eye + ((float)layout.width / 2), + layout.top_screen.top, layout.top_screen.GetWidth(), + layout.top_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereo(0, 1, (float)top_screen.left, + (float)top_screen.top, (float)top_screen.GetWidth(), + (float)top_screen.GetHeight()); + } + } + } + + draw_info.layer = 0; + if (layout.bottom_screen_enabled) { + if (layout.is_rotated) { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreenRotated(2, (float)bottom_screen.left, + (float)bottom_screen.top, (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreenRotated( + 2, (float)bottom_screen.left / 2, (float)bottom_screen.top, + (float)bottom_screen.GetWidth() / 2, (float)bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated( + 2, ((float)bottom_screen.left / 2) + ((float)layout.width / 2), + (float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2, + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreenRotated(2, layout.bottom_screen.left, + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated(2, + layout.cardboard.bottom_screen_right_eye + + ((float)layout.width / 2), + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereoRotated(2, 2, (float)bottom_screen.left, (float)bottom_screen.top, + (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } + } else { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreen(2, (float)bottom_screen.left, + (float)bottom_screen.top, (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreen(2, (float)bottom_screen.left / 2, + (float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2, + (float)bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(2, + ((float)bottom_screen.left / 2) + ((float)layout.width / 2), + (float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2, + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreen(2, layout.bottom_screen.left, + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(2, + layout.cardboard.bottom_screen_right_eye + + ((float)layout.width / 2), + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereo(2, 2, (float)bottom_screen.left, + (float)bottom_screen.top, (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } + } + } + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.endRenderPass(); +} + +void RendererVulkan::SwapBuffers() { + const auto& layout = render_window.GetFramebufferLayout(); + PrepareRendertarget(); + + // Create swapchain if needed + if (swapchain.NeedsRecreation()) { + swapchain.Create(layout.width, layout.height, false); + } + + // Calling Submit will change the slot so get the required semaphores now + const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore(); + const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore(); + swapchain.AcquireNextImage(image_acquired); + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = static_cast(layout.width), + .height = static_cast(layout.height), + .minDepth = 0.0f, + .maxDepth = 1.0f + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {layout.width, layout.height} + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setViewport(0, viewport); + command_buffer.setScissor(0, scissor); + + for (auto& info : screen_infos) { + auto alloc = info.display_texture ? info.display_texture : &info.texture.alloc; + runtime.Transition(command_buffer, *alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0, alloc->levels); + } + + DrawScreens(layout, false); + + // Flush all buffers to make the data visible to the GPU before submitting + rasterizer->FlushBuffers(); + vertex_buffer.Flush(); + + scheduler.Submit(SubmitMode::SwapchainSynced); + swapchain.Present(present_ready); + + // Inform texture runtime about the switch + runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex()); + + // When the command buffer switches, all state becomes undefined. + // This is problematic with dynamic states, so set all states here + if (instance.IsExtendedDynamicStateSupported()) { + rasterizer->SyncFixedState(); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 000000000..8f4153311 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -0,0 +1,126 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Vulkan { + +/// Structure used for storing information about the textures for each 3DS screen +struct TextureInfo { + ImageAlloc alloc; + u32 width; + u32 height; + GPU::Regs::PixelFormat format; +}; + +/// Structure used for storing information about the display target for each 3DS screen +struct ScreenInfo { + ImageAlloc* display_texture = nullptr; + Common::Rectangle display_texcoords; + TextureInfo texture; + vk::Sampler sampler; +}; + +// Uniform data used for presenting the 3DS screens +struct PresentUniformData { + glm::mat4 modelview; + Common::Vec4f i_resolution; + Common::Vec4f o_resolution; + int screen_id_l = 0; + int screen_id_r = 0; + int layer = 0; + int reverse_interlaced = 0; + + // Returns an immutable byte view of the uniform data + auto AsBytes() const { + return std::as_bytes(std::span{this, 1}); + } +}; + +static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!"); + +constexpr u32 PRESENT_PIPELINES = 3; + +class RasterizerVulkan; + +class RendererVulkan : public RendererBase { +public: + RendererVulkan(Frontend::EmuWindow& window); + ~RendererVulkan() override; + + VideoCore::ResultStatus Init() override; + VideoCore::RasterizerInterface* Rasterizer() override; + void ShutDown() override; + void SwapBuffers() override; + void TryPresent(int timeout_ms) override {} + void PrepareVideoDumping() override {} + void CleanupVideoDumping() override {} + void Sync() override; + +private: + void ReloadSampler(); + void ReloadPipeline(); + void CompileShaders(); + void BuildLayouts(); + void BuildPipelines(); + void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer); + void ConfigureRenderPipeline(); + void PrepareRendertarget(); + void BeginRendering(); + + void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped); + void DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h); + void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h); + void DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h); + void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h); + + void UpdateFramerate(); + + /// Loads framebuffer from emulated memory into the display information structure + void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye); + +private: + Instance instance; + TaskScheduler scheduler; + RenderpassCache renderpass_cache; + TextureRuntime runtime; + Swapchain swapchain; + std::unique_ptr rasterizer; + StreamBuffer vertex_buffer; + + // Present pipelines (Normal, Anaglyph, Interlaced) + vk::PipelineLayout present_pipeline_layout; + vk::DescriptorSetLayout present_descriptor_layout; + vk::DescriptorUpdateTemplate present_update_template; + std::array present_pipelines; + std::array present_descriptor_sets; + std::array present_shaders; + std::array present_samplers; + vk::ShaderModule present_vertex_shader; + u32 current_pipeline = 0; + u32 current_sampler = 0; + + /// Display information for top and bottom screens respectively + std::array screen_infos{}; + PresentUniformData draw_info{}; + vk::ClearColorValue clear_color{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_common.cpp b/src/video_core/renderer_vulkan/vk_common.cpp new file mode 100644 index 000000000..f8377bc8d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_common.cpp @@ -0,0 +1,9 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VMA_IMPLEMENTATION +#include "video_core/renderer_vulkan/vk_common.h" + +// Store the dispatch loader here +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h new file mode 100644 index 000000000..ff01bb2f2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -0,0 +1,72 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +// Include vulkan-hpp header +#define VK_NO_PROTOTYPES 1 +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 +#include + +// Include Vulkan memory allocator +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 +#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1 +#include + +namespace Vulkan { + +constexpr u32 SCHEDULER_COMMAND_COUNT = 4; + +/// Return the image aspect associated on the provided format +constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) { + switch (format) { + case vk::Format::eD16UnormS8Uint: + case vk::Format::eD24UnormS8Uint: + case vk::Format::eX8D24UnormPack32: + case vk::Format::eD32SfloatS8Uint: + return vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth; + break; + case vk::Format::eD16Unorm: + case vk::Format::eD32Sfloat: + return vk::ImageAspectFlagBits::eDepth; + break; + default: + return vk::ImageAspectFlagBits::eColor; + } +} + +/// Returns a bit mask with the required usage of a format with a particular aspect +constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) { + auto usage = vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferDst | + vk::ImageUsageFlagBits::eTransferSrc; + + if (aspect & vk::ImageAspectFlagBits::eDepth) { + return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment; + } else { + return usage | vk::ImageUsageFlagBits::eStorage | + vk::ImageUsageFlagBits::eColorAttachment; + } +} + +/// Returns a bit mask with the required features of a format with a particular aspect +constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) { + auto usage = vk::FormatFeatureFlagBits::eSampledImage | + vk::FormatFeatureFlagBits::eTransferDst | + vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst; + + if (aspect & vk::ImageAspectFlagBits::eDepth) { + return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment; + } else { + return usage | vk::FormatFeatureFlagBits::eStorageImage | + vk::FormatFeatureFlagBits::eColorAttachment; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp new file mode 100644 index 000000000..b8791dbab --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -0,0 +1,268 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/assert.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +Instance::Instance(Frontend::EmuWindow& window) { + auto window_info = window.GetWindowInfo(); + + // Fetch instance independant function pointers + vk::DynamicLoader dl; + auto vkGetInstanceProcAddr = dl.getProcAddress("vkGetInstanceProcAddr"); + VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); + + // Enable the instance extensions the backend uses + auto extensions = GetInstanceExtensions(window_info.type, true); + + // We require a Vulkan 1.1 driver + const u32 available_version = vk::enumerateInstanceVersion(); + if (available_version < VK_API_VERSION_1_1) { + LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!"); + } + + const vk::ApplicationInfo application_info = { + .pApplicationName = "Citra", + .applicationVersion = VK_MAKE_VERSION(1, 0, 0), + .pEngineName = "Citra Vulkan", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = available_version + }; + + const std::array layers = {"VK_LAYER_KHRONOS_validation"}; + const vk::InstanceCreateInfo instance_info = { + .pApplicationInfo = &application_info, + .enabledLayerCount = static_cast(layers.size()), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = static_cast(extensions.size()), + .ppEnabledExtensionNames = extensions.data() + }; + + instance = vk::createInstance(instance_info); + surface = CreateSurface(instance, window); + + // TODO: GPU select dialog + auto physical_devices = instance.enumeratePhysicalDevices(); + physical_device = physical_devices[1]; + device_properties = physical_device.getProperties(); + + CreateDevice(); +} + +Instance::~Instance() { + device.waitIdle(); + vmaDestroyAllocator(allocator); + device.destroy(); + instance.destroySurfaceKHR(surface); + instance.destroy(); +} + +bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const { + static std::unordered_map supported; + if (auto it = supported.find(format); it != supported.end()) { + return (it->second.optimalTilingFeatures & usage) == usage; + } + + // Cache format properties so we don't have to query the driver all the time + const vk::FormatProperties properties = physical_device.getFormatProperties(format); + supported.insert(std::make_pair(format, properties)); + + return (properties.optimalTilingFeatures & usage) == usage; +} + +vk::Format Instance::GetFormatAlternative(vk::Format format) const { + if (format == vk::Format::eUndefined) { + return format; + } + + vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format)); + if (IsFormatSupported(format, features)) { + return format; + } + + // Return the most supported alternative format preferably with the + // same block size according to the Vulkan spec. + // See 43.3. Required Format Support of the Vulkan spec + switch (format) { + case vk::Format::eD24UnormS8Uint: + return vk::Format::eD32SfloatS8Uint; + case vk::Format::eX8D24UnormPack32: + return vk::Format::eD32Sfloat; + case vk::Format::eR5G5B5A1UnormPack16: + return vk::Format::eA1R5G5B5UnormPack16; + case vk::Format::eR8G8B8Unorm: + return vk::Format::eR8G8B8A8Unorm; + case vk::Format::eUndefined: + return vk::Format::eUndefined; + case vk::Format::eR4G4B4A4UnormPack16: + // B4G4R4A4 is not guaranteed by the spec to support attachments + return GetFormatAlternative(vk::Format::eB4G4R4A4UnormPack16); + default: + LOG_WARNING(Render_Vulkan, "Format {} doesn't support attachments, falling back to RGBA8", + vk::to_string(format)); + return vk::Format::eR8G8B8A8Unorm; + } +} + +bool Instance::CreateDevice() { + auto feature_chain = physical_device.getFeatures2(); + + // Not having geometry shaders will cause issues with accelerated rendering. + const vk::PhysicalDeviceFeatures available = feature_chain.get().features; + if (!available.geometryShader) { + LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!"); + } + + auto extension_list = physical_device.enumerateDeviceExtensionProperties(); + if (extension_list.empty()) { + LOG_CRITICAL(Render_Vulkan, "No extensions supported by device."); + return false; + } + + // Helper lambda for adding extensions + std::array enabled_extensions; + u32 enabled_extension_count = 0; + + auto AddExtension = [&](std::string_view name) -> bool { + auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) { + return name.compare(prop.extensionName.data()); + }); + + if (result != extension_list.end()) { + LOG_INFO(Render_Vulkan, "Enabling extension: {}", name); + enabled_extensions[enabled_extension_count++] = name.data(); + return true; + } + + LOG_WARNING(Render_Vulkan, "Extension {} unavailable.", name); + return false; + }; + + AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); + extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + + // Search queue families for graphics and present queues + auto family_properties = physical_device.getQueueFamilyProperties(); + if (family_properties.empty()) { + LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues."); + return false; + } + + bool graphics_queue_found = false; + bool present_queue_found = false; + for (std::size_t i = 0; i < family_properties.size(); i++) { + // Check if queue supports graphics + const u32 index = static_cast(i); + if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) { + graphics_queue_family_index = index; + graphics_queue_found = true; + + // If this queue also supports presentation we are finished + if (physical_device.getSurfaceSupportKHR(static_cast(i), surface)) { + present_queue_family_index = index; + present_queue_found = true; + break; + } + } + + // Check if queue supports presentation + if (physical_device.getSurfaceSupportKHR(index, surface)) { + present_queue_family_index = index; + present_queue_found = true; + } + } + + if (!graphics_queue_found || !present_queue_found) { + LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues."); + return false; + } + + static constexpr float queue_priorities[] = {1.0f}; + + const std::array queue_infos = { + vk::DeviceQueueCreateInfo{ + .queueFamilyIndex = graphics_queue_family_index, + .queueCount = 1, + .pQueuePriorities = queue_priorities + }, + vk::DeviceQueueCreateInfo{ + .queueFamilyIndex = present_queue_family_index, + .queueCount = 1, + .pQueuePriorities = queue_priorities + } + }; + + const u32 queue_count = graphics_queue_family_index != present_queue_family_index ? 2u : 1u; + const vk::StructureChain device_chain = { + vk::DeviceCreateInfo{ + .queueCreateInfoCount = queue_count, + .pQueueCreateInfos = queue_infos.data(), + .enabledExtensionCount = enabled_extension_count, + .ppEnabledExtensionNames = enabled_extensions.data(), + }, + vk::PhysicalDeviceFeatures2{ + .features = { + .robustBufferAccess = available.robustBufferAccess, + .geometryShader = available.geometryShader, + .dualSrcBlend = available.dualSrcBlend, + .logicOp = available.logicOp, + .depthClamp = available.depthClamp, + .largePoints = available.largePoints, + .samplerAnisotropy = available.samplerAnisotropy, + .fragmentStoresAndAtomics = available.fragmentStoresAndAtomics, + .shaderStorageImageMultisample = available.shaderStorageImageMultisample, + .shaderClipDistance = available.shaderClipDistance + } + }, + vk::PhysicalDeviceDepthClipControlFeaturesEXT{ + .depthClipControl = true + }, + feature_chain.get(), + feature_chain.get() + }; + + // Create logical device + device = physical_device.createDevice(device_chain.get()); + VULKAN_HPP_DEFAULT_DISPATCHER.init(device); + + // Grab the graphics and present queues. + graphics_queue = device.getQueue(graphics_queue_family_index, 0); + present_queue = device.getQueue(present_queue_family_index, 0); + + CreateAllocator(); + return true; +} + +void Instance::CreateAllocator() { + const VmaVulkanFunctions functions = { + .vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr, + .vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr + }; + + const VmaAllocatorCreateInfo allocator_info = { + .physicalDevice = physical_device, + .device = device, + .pVulkanFunctions = &functions, + .instance = instance, + .vulkanApiVersion = VK_API_VERSION_1_1 + }; + + if (VkResult result = vmaCreateAllocator(&allocator_info, &allocator); result != VK_SUCCESS) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize VMA with error {}", result); + UNREACHABLE(); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h new file mode 100644 index 000000000..3297e3a31 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -0,0 +1,129 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { + +/// The global Vulkan instance +class Instance { +public: + Instance(Frontend::EmuWindow& window); + ~Instance(); + + /// Returns true when the format supports the provided feature flags + bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const; + + /// Returns the most compatible format that supports the provided feature flags + vk::Format GetFormatAlternative(vk::Format format) const; + + /// Returns the Vulkan instance + vk::Instance GetInstance() const { + return instance; + } + + /// Returns the Vulkan surface + vk::SurfaceKHR GetSurface() const { + return surface; + } + + /// Returns the current physical device + vk::PhysicalDevice GetPhysicalDevice() const { + return physical_device; + } + + /// Returns the Vulkan device + vk::Device GetDevice() const { + return device; + } + + VmaAllocator GetAllocator() const { + return allocator; + } + + /// Retrieve queue information + u32 GetGraphicsQueueFamilyIndex() const { + return graphics_queue_family_index; + } + + u32 GetPresentQueueFamilyIndex() const { + return present_queue_family_index; + } + + vk::Queue GetGraphicsQueue() const { + return graphics_queue; + } + + vk::Queue GetPresentQueue() const { + return present_queue; + } + + /// Returns true when VK_KHR_timeline_semaphore is supported + bool IsTimelineSemaphoreSupported() const { + return timeline_semaphores; + } + + /// Returns true when VK_EXT_extended_dynamic_state is supported + bool IsExtendedDynamicStateSupported() const { + return extended_dynamic_state; + } + + /// Returns true when VK_KHR_push_descriptors is supported + bool IsPushDescriptorsSupported() const { + return push_descriptors; + } + + /// Returns the vendor ID of the physical device + u32 GetVendorID() const { + return device_properties.vendorID; + } + + /// Returns the device ID of the physical device + u32 GetDeviceID() const { + return device_properties.deviceID; + } + + /// Returns the pipeline cache unique identifier + const auto GetPipelineCacheUUID() const { + return device_properties.pipelineCacheUUID; + } + + /// Returns the minimum required alignment for uniforms + vk::DeviceSize UniformMinAlignment() const { + return device_properties.limits.minUniformBufferOffsetAlignment; + } + +private: + /// Creates the logical device opportunistically enabling extensions + bool CreateDevice(); + + /// Creates the VMA allocator handle + void CreateAllocator(); + +private: + vk::Device device; + vk::PhysicalDevice physical_device; + vk::Instance instance; + vk::SurfaceKHR surface; + vk::PhysicalDeviceProperties device_properties; + VmaAllocator allocator; + vk::Queue present_queue; + vk::Queue graphics_queue; + u32 present_queue_family_index = 0; + u32 graphics_queue_family_index = 0; + + bool timeline_semaphores = false; + bool extended_dynamic_state = false; + bool push_descriptors = false; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 000000000..69da9797b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -0,0 +1,714 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/common_paths.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" + +namespace Vulkan { + +struct Bindings { + std::array bindings; + u32 binding_count; +}; + +constexpr u32 RASTERIZER_SET_COUNT = 4; +constexpr static std::array RASTERIZER_SETS = { + Bindings{ + // Utility set + .bindings = { + vk::DescriptorType::eUniformBuffer, + vk::DescriptorType::eUniformBuffer, + vk::DescriptorType::eUniformTexelBuffer, + vk::DescriptorType::eUniformTexelBuffer, + vk::DescriptorType::eUniformTexelBuffer + }, + .binding_count = 5 + }, + Bindings{ + // Texture set + .bindings = { + vk::DescriptorType::eSampledImage, + vk::DescriptorType::eSampledImage, + vk::DescriptorType::eSampledImage, + vk::DescriptorType::eSampledImage + }, + .binding_count = 4 + }, + Bindings{ + // Sampler set + .bindings = { + vk::DescriptorType::eSampler, + vk::DescriptorType::eSampler, + vk::DescriptorType::eSampler, + vk::DescriptorType::eSampler + }, + .binding_count = 4 + }, + Bindings { + // Shadow set + .bindings = { + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage, + vk::DescriptorType::eStorageImage + }, + .binding_count = 7 + } +}; + +constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) { + vk::ShaderStageFlags flags; + switch (type) { + case vk::DescriptorType::eSampler: + case vk::DescriptorType::eSampledImage: + case vk::DescriptorType::eUniformTexelBuffer: + case vk::DescriptorType::eStorageImage: + flags = vk::ShaderStageFlagBits::eFragment; + break; + case vk::DescriptorType::eUniformBuffer: + case vk::DescriptorType::eUniformBufferDynamic: + flags = vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eCompute; + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown descriptor type!"); + } + + return flags; +} + +u32 AttribBytes(VertexAttribute attrib) { + switch (attrib.type) { + case AttribType::Float: + return sizeof(float) * attrib.size; + case AttribType::Int: + return sizeof(u32) * attrib.size; + case AttribType::Short: + return sizeof(u16) * attrib.size; + case AttribType::Byte: + case AttribType::Ubyte: + return sizeof(u8) * attrib.size; + } +} + +vk::Format ToVkAttributeFormat(VertexAttribute attrib) { + switch (attrib.type) { + case AttribType::Float: + switch (attrib.size) { + case 1: return vk::Format::eR32Sfloat; + case 2: return vk::Format::eR32G32Sfloat; + case 3: return vk::Format::eR32G32B32Sfloat; + case 4: return vk::Format::eR32G32B32A32Sfloat; + } + default: + LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!"); + UNREACHABLE(); + } + + return vk::Format::eR32Sfloat; +} + +vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { + switch (index) { + case 0: return vk::ShaderStageFlagBits::eVertex; + case 1: return vk::ShaderStageFlagBits::eFragment; + case 2: return vk::ShaderStageFlagBits::eGeometry; + default: + LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!"); + UNREACHABLE(); + } + + return vk::ShaderStageFlagBits::eVertex; +} + +PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} { + descriptor_dirty.fill(true); + + LoadDiskCache(); + BuildLayout(); + trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex, + instance.GetDevice(), ShaderOptimization::Debug); +} + +PipelineCache::~PipelineCache() { + vk::Device device = instance.GetDevice(); + + SaveDiskCache(); + + device.destroyPipelineLayout(layout); + device.destroyPipelineCache(pipeline_cache); + device.destroyShaderModule(trivial_vertex_shader); + for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) { + device.destroyDescriptorSetLayout(descriptor_set_layouts[i]); + device.destroyDescriptorUpdateTemplate(update_templates[i]); + } + + for (auto& [key, module] : programmable_vertex_shaders.shader_cache) { + device.destroyShaderModule(module); + } + + for (auto& [key, module] : fixed_geometry_shaders.shaders) { + device.destroyShaderModule(module); + } + + for (auto& [key, module] : fragment_shaders.shaders) { + device.destroyShaderModule(module); + } + + for (const auto& [hash, pipeline] : graphics_pipelines) { + device.destroyPipeline(pipeline); + } + + graphics_pipelines.clear(); +} + +void PipelineCache::BindPipeline(const PipelineInfo& info) { + ApplyDynamic(info); + + // When texture downloads occur the runtime will flush the GPU and cause + // a scheduler slot switch behind our back. This might invalidate any + // cached descriptor sets/require pipeline rebinding. + if (timestamp != scheduler.GetHostFenceCounter()) { + MarkDirty(); + } + + u64 shader_hash = 0; + for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { + shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); + } + + const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() ? + offsetof(PipelineInfo, rasterization) : + offsetof(PipelineInfo, depth_stencil) + offsetof(DepthStencilState, stencil_reference); + + u64 info_hash = Common::ComputeHash64(&info, info_hash_size); + u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); + + auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{}); + if (new_pipeline) { + it->second = BuildPipeline(info); + } + + if (it->second != current_pipeline) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second); + current_pipeline = it->second; + } + + BindDescriptorSets(); +} + +bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) { + const PicaVSConfig config{regs.vs, setup}; + auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex, + instance.GetDevice(), ShaderOptimization::Debug); + if (!handle) { + return false; + } + + current_shaders[ProgramType::VS] = handle; + shader_hashes[ProgramType::VS] = config.Hash(); + return true; +} + +void PipelineCache::UseTrivialVertexShader() { + current_shaders[ProgramType::VS] = trivial_vertex_shader; + shader_hashes[ProgramType::VS] = 0; +} + +void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { + const PicaFixedGSConfig gs_config{regs}; + auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, + instance.GetDevice(), ShaderOptimization::Debug); + current_shaders[ProgramType::GS] = handle; + shader_hashes[ProgramType::GS] = gs_config.Hash(); +} + +void PipelineCache::UseTrivialGeometryShader() { + current_shaders[ProgramType::GS] = VK_NULL_HANDLE; + shader_hashes[ProgramType::GS] = 0; +} + +void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { + const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); + auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment, + instance.GetDevice(), ShaderOptimization::Debug); + current_shaders[ProgramType::FS] = handle; + shader_hashes[ProgramType::FS] = config.Hash(); +} + +void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { + const vk::DescriptorImageInfo image_info = { + .imageView = image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + }; + + SetBinding(1, binding, DescriptorData{image_info}); +} + +void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { + const vk::DescriptorImageInfo image_info = { + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral + }; + + SetBinding(3, binding, DescriptorData{image_info}); +} + +void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { + const DescriptorData data = { + .buffer_info = vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size + } + }; + + SetBinding(0, binding, data); +} + +void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { + const DescriptorData data = { + .buffer_view = buffer_view + }; + + SetBinding(0, binding, data); +} + +void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) { + const DescriptorData data = { + .image_info = vk::DescriptorImageInfo{ + .sampler = sampler + } + }; + + SetBinding(2, binding, data); +} + +void PipelineCache::SetViewport(float x, float y, float width, float height) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f}); +} + +void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}}); +} + +void PipelineCache::MarkDirty() { + descriptor_dirty.fill(true); + current_pipeline = VK_NULL_HANDLE; + timestamp = scheduler.GetHostFenceCounter(); +} + +void PipelineCache::ApplyDynamic(const PipelineInfo& info) { + if (instance.IsExtendedDynamicStateSupported()) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(info.rasterization.topology)); + } +} + +void PipelineCache::SetBinding(u32 set, u32 binding, DescriptorData data) { + if (update_data[set][binding] != data) { + update_data[set][binding] = data; + descriptor_dirty[set] = true; + } +} + +void PipelineCache::BuildLayout() { + std::array set_bindings; + std::array update_entries; + + vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { + const auto& set = RASTERIZER_SETS[i]; + for (u32 j = 0; j < set.binding_count; j++) { + vk::DescriptorType type = set.bindings[j]; + set_bindings[j] = vk::DescriptorSetLayoutBinding{ + .binding = j, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = ToVkStageFlags(type) + }; + + update_entries[j] = vk::DescriptorUpdateTemplateEntry{ + .dstBinding = j, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = j * sizeof(DescriptorData), + .stride = 0 + }; + } + + const vk::DescriptorSetLayoutCreateInfo layout_info = { + .bindingCount = set.binding_count, + .pBindings = set_bindings.data() + }; + + // Create descriptor set layout + descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info); + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = set.binding_count, + .pDescriptorUpdateEntries = update_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = descriptor_set_layouts[i] + }; + + // Create descriptor set update template + update_templates[i] = device.createDescriptorUpdateTemplate(template_info); + } + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = RASTERIZER_SET_COUNT, + .pSetLayouts = descriptor_set_layouts.data(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr + }; + + layout = device.createPipelineLayout(layout_info); +} + +vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { + vk::Device device = instance.GetDevice(); + + u32 shader_count = 0; + std::array shader_stages; + for (std::size_t i = 0; i < current_shaders.size(); i++) { + vk::ShaderModule shader = current_shaders[i]; + if (!shader) { + continue; + } + + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ + .stage = ToVkShaderStage(i), + .module = shader, + .pName = "main" + }; + } + + /** + * Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing + * data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate. + * Since one instance is all we render, the shader will always read the single attribute. + */ + std::array bindings; + for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { + const auto& binding = info.vertex_layout.bindings[i]; + bindings[i] = vk::VertexInputBindingDescription{ + .binding = binding.binding, + .stride = binding.stride, + .inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance + : vk::VertexInputRate::eVertex + }; + } + + // Populate vertex attribute structures + std::array attributes; + for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { + const auto& attr = info.vertex_layout.attributes[i]; + attributes[i] = vk::VertexInputAttributeDescription{ + .location = attr.location, + .binding = attr.binding, + .format = ToVkAttributeFormat(attr), + .offset = attr.offset + }; + } + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = info.vertex_layout.binding_count, + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count, + .pVertexAttributeDescriptions = attributes.data() + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = PicaToVK::PrimitiveTopology(info.rasterization.topology), + .primitiveRestartEnable = false + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = PicaToVK::CullMode(info.rasterization.cull_mode), + .frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode), + .depthBiasEnable = false, + .lineWidth = 1.0f + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = info.blending.blend_enable.Value(), + .srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor), + .dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor), + .colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq), + .srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor), + .dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor), + .alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq), + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = info.blending.logic_op_enable.Value(), + .logicOp = PicaToVK::LogicOp(info.blending.logic_op), + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f} + }; + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = 1.0f, + .height = 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {1, 1} + }; + + vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = { + .negativeOneToOne = true + }; + + const vk::PipelineViewportStateCreateInfo viewport_info = { + .pNext = &depth_clip_control, + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported(); + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + vk::DynamicState::eStencilCompareMask, + vk::DynamicState::eStencilWriteMask, + vk::DynamicState::eStencilReference, + vk::DynamicState::eBlendConstants, + // VK_EXT_extended_dynamic_state + vk::DynamicState::eCullModeEXT, + vk::DynamicState::eDepthCompareOpEXT, + vk::DynamicState::eDepthTestEnableEXT, + vk::DynamicState::eDepthWriteEnableEXT, + vk::DynamicState::eFrontFaceEXT, + vk::DynamicState::ePrimitiveTopologyEXT, + vk::DynamicState::eStencilOpEXT, + vk::DynamicState::eStencilTestEnableEXT, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = + extended_dynamic_states ? static_cast(dynamic_states.size()) : 6u, + .pDynamicStates = dynamic_states.data() + }; + + const vk::StencilOpState stencil_op_state = { + .failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + .passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + .depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + .compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op) + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = static_cast(info.depth_stencil.depth_test_enable.Value()), + .depthWriteEnable = static_cast(info.depth_stencil.depth_write_enable.Value()), + .depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op), + .depthBoundsTestEnable = false, + .stencilTestEnable = static_cast(info.depth_stencil.stencil_test_enable.Value()), + .front = stencil_op_state, + .back = stencil_op_state + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = shader_count, + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = layout, + .renderPass = renderpass_cache.GetRenderpass(info.color_attachment, + info.depth_attachment, false) + }; + + if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!"); + UNREACHABLE(); + } + + return VK_NULL_HANDLE; +} + +static_assert(sizeof(vk::DescriptorBufferInfo) == sizeof(VkDescriptorBufferInfo)); + +void PipelineCache::BindDescriptorSets() { + vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { + if (descriptor_dirty[i] || !descriptor_sets[i]) { + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = scheduler.GetDescriptorPool(), + .descriptorSetCount = 1, + .pSetLayouts = &descriptor_set_layouts[i] + }; + + vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; + device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]); + + descriptor_sets[i] = set; + descriptor_dirty[i] = false; + } + } + + // Bind the descriptor sets + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, RASTERIZER_SET_COUNT, + descriptor_sets.data(), 0, nullptr); +} + +void PipelineCache::LoadDiskCache() { + if (!EnsureDirectories()) { + return; + } + + const std::string cache_file_path = GetPipelineCacheDir() + DIR_SEP "pipelines.bin"; + vk::PipelineCacheCreateInfo cache_info = { + .initialDataSize = 0, + .pInitialData = nullptr + }; + + FileUtil::IOFile cache_file{cache_file_path, "r"}; + if (cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Loading pipeline cache"); + + const u32 cache_file_size = cache_file.GetSize(); + auto cache_data = std::vector(cache_file_size); + if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) { + if (!IsCacheValid(cache_data.data(), cache_file_size)) { + LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid"); + } else { + cache_info.initialDataSize = cache_file_size; + cache_info.pInitialData = cache_data.data(); + } + } + + cache_file.Close(); + } + + vk::Device device = instance.GetDevice(); + pipeline_cache = device.createPipelineCache(cache_info); +} + +void PipelineCache::SaveDiskCache() { + if (!EnsureDirectories()) { + return; + } + + const std::string cache_file_path = GetPipelineCacheDir() + DIR_SEP "pipelines.bin"; + FileUtil::IOFile cache_file{cache_file_path, "wb"}; + if (!cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing"); + return; + } + + vk::Device device = instance.GetDevice(); + auto cache_data = device.getPipelineCacheData(pipeline_cache); + if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) { + LOG_WARNING(Render_Vulkan, "Error during pipeline cache write"); + return; + } + + cache_file.Close(); +} + +bool PipelineCache::IsCacheValid(const u8* data, u32 size) const { + if (size < sizeof(vk::PipelineCacheHeaderVersionOne)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header"); + return false; + } + + vk::PipelineCacheHeaderVersionOne header; + std::memcpy(&header, data, sizeof(header)); + if (header.headerSize < sizeof(header)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) { + LOG_ERROR(Render_Vulkan, + "Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})", + header.vendorID, vendor_id); + return false; + } + + if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) { + LOG_ERROR(Render_Vulkan, + "Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})", + header.deviceID, device_id); + return false; + } + + if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +bool PipelineCache::EnsureDirectories() const { + const auto CreateDir = [](const std::string& dir) { + if (!FileUtil::CreateDir(dir)) { + LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir); + return false; + } + + return true; + }; + + return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && + CreateDir(GetPipelineCacheDir()); +} + +std::string PipelineCache::GetPipelineCacheDir() const { + return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan"; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 000000000..46cd21be8 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -0,0 +1,268 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/bit_field.h" +#include "common/hash.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" +#include "video_core/shader/shader_cache.h" +#include "video_core/regs.h" + +namespace Vulkan { + +constexpr u32 MAX_SHADER_STAGES = 3; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; +constexpr u32 MAX_VERTEX_BINDINGS = 16; +constexpr u32 MAX_DESCRIPTORS = 8; +constexpr u32 MAX_DESCRIPTOR_SETS = 6; + +enum class AttribType : u32 { + Float = 0, + Int = 1, + Short = 2, + Byte = 3, + Ubyte = 4 +}; + +/** + * The pipeline state is tightly packed with bitfields to reduce + * the overhead of hashing as much as possible + */ +union RasterizationState { + u8 value = 0; + BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology; + BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode; +}; + +struct DepthStencilState { + union { + u32 value = 0; + BitField<0, 1, u32> depth_test_enable; + BitField<1, 1, u32> depth_write_enable; + BitField<2, 1, u32> stencil_test_enable; + BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op; + BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op; + BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op; + BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op; + BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op; + }; + + // These are dynamic state so keep them separate + u8 stencil_reference; + u8 stencil_compare_mask; + u8 stencil_write_mask; +}; + +union BlendingState { + u32 value = 0; + BitField<0, 1, u32> blend_enable; + BitField<1, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor; + BitField<5, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor; + BitField<9, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq; + BitField<12, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor; + BitField<16, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor; + BitField<20, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq; + BitField<23, 4, u32> color_write_mask; + BitField<27, 1, u32> logic_op_enable; + BitField<28, 4, Pica::FramebufferRegs::LogicOp> logic_op; +}; + +union VertexBinding { + u16 value = 0; + BitField<0, 4, u16> binding; + BitField<4, 1, u16> fixed; + BitField<5, 11, u16> stride; +}; + +union VertexAttribute { + u32 value = 0; + BitField<0, 4, u32> binding; + BitField<4, 4, u32> location; + BitField<8, 3, AttribType> type; + BitField<11, 3, u32> size; + BitField<14, 11, u32> offset; +}; + +struct VertexLayout { + u8 binding_count; + u8 attribute_count; + std::array bindings; + std::array attributes; +}; + +/** + * Information about a graphics/compute pipeline + */ +struct PipelineInfo { + VertexLayout vertex_layout{}; + BlendingState blending{}; + VideoCore::PixelFormat color_attachment = VideoCore::PixelFormat::RGBA8; + VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8; + RasterizationState rasterization{}; + DepthStencilState depth_stencil{}; + + bool IsDepthWriteEnabled() const { + const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8; + const bool depth_write = + depth_stencil.depth_test_enable && depth_stencil.depth_write_enable; + const bool stencil_write = + has_stencil && depth_stencil.stencil_test_enable && depth_stencil.stencil_write_mask != 0; + + return depth_write || stencil_write; + } +}; + +union DescriptorData { + vk::DescriptorImageInfo image_info; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; + + bool operator!=(const DescriptorData& other) const { + return std::memcmp(this, &other, sizeof(DescriptorData)) != 0; + } +}; + +using DescriptorSetData = std::array; + +/** + * Vulkan specialized PICA shader caches + */ +using ProgrammableVertexShaders = + Pica::Shader::ShaderDoubleCache; + +using FixedGeometryShaders = + Pica::Shader::ShaderCache; + +using FragmentShaders = + Pica::Shader::ShaderCache; + + +class Instance; +class TaskScheduler; +class RenderpassCache; + +/** + * Stores a collection of rasterizer pipelines used during rendering. + * In addition handles descriptor set management. + */ +class PipelineCache { +public: + PipelineCache(const Instance& instance, TaskScheduler& scheduler, RenderpassCache& renderpass_cache); + ~PipelineCache(); + + /// Binds a pipeline using the provided information + void BindPipeline(const PipelineInfo& info); + + /// Binds a PICA decompiled vertex shader + bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup); + + /// Binds a passthrough vertex shader + void UseTrivialVertexShader(); + + /// Binds a PICA decompiled geometry shader + void UseFixedGeometryShader(const Pica::Regs& regs); + + /// Binds a passthrough geometry shader + void UseTrivialGeometryShader(); + + /// Binds a fragment shader generated from PICA state + void UseFragmentShader(const Pica::Regs& regs); + + /// Binds a texture to the specified binding + void BindTexture(u32 binding, vk::ImageView image_view); + + /// Binds a storage image to the specified binding + void BindStorageImage(u32 binding, vk::ImageView image_view); + + /// Binds a buffer to the specified binding + void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size); + + /// Binds a buffer to the specified binding + void BindTexelBuffer(u32 binding, vk::BufferView buffer_view); + + /// Binds a sampler to the specified binding + void BindSampler(u32 binding, vk::Sampler sampler); + + /// Sets the viewport rectangle to the provided values + void SetViewport(float x, float y, float width, float height); + + /// Sets the scissor rectange to the provided values + void SetScissor(s32 x, s32 y, u32 width, u32 height); + + /// Marks all cached pipeline cache state as dirty + void MarkDirty(); + +private: + /// Binds a resource to the provided binding + void SetBinding(u32 set, u32 binding, DescriptorData data); + + /// Applies dynamic pipeline state to the current command buffer + void ApplyDynamic(const PipelineInfo& info); + + /// Builds the rasterizer pipeline layout + void BuildLayout(); + + /// Builds a rasterizer pipeline using the PipelineInfo struct + vk::Pipeline BuildPipeline(const PipelineInfo& info); + + /// Builds descriptor sets that reference the currently bound resources + void BindDescriptorSets(); + + /// Loads the pipeline cache stored to disk + void LoadDiskCache(); + + /// Stores the generated pipeline cache to disk + void SaveDiskCache(); + + /// Returns true when the disk data can be used by the current driver + bool IsCacheValid(const u8* data, u32 size) const; + + /// Create shader disk cache directories. Returns true on success. + bool EnsureDirectories() const; + + /// Returns the pipeline cache storage dir + std::string GetPipelineCacheDir() const; + +private: + const Instance& instance; + TaskScheduler& scheduler; + RenderpassCache& renderpass_cache; + + // Cached pipelines + vk::PipelineCache pipeline_cache; + std::unordered_map> graphics_pipelines; + vk::Pipeline current_pipeline{}; + + // Cached layouts for the rasterizer pipelines + vk::PipelineLayout layout; + std::array descriptor_set_layouts; + std::array update_templates; + + // Current data for the descriptor sets + std::array update_data{}; + std::array descriptor_dirty{}; + std::array descriptor_sets; + u64 timestamp = 0; + + // Bound shader modules + enum ProgramType : u32 { + VS = 0, + GS = 2, + FS = 1 + }; + + std::array current_shaders; + std::array shader_hashes; + ProgrammableVertexShaders programmable_vertex_shaders; + FixedGeometryShaders fixed_geometry_shaders; + FragmentShaders fragment_shaders; + vk::ShaderModule trivial_vertex_shader; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp new file mode 100644 index 000000000..9a29107c5 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -0,0 +1,131 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Include the vulkan platform specific header +#if defined(ANDROID) || defined (__ANDROID__) + #define VK_USE_PLATFORM_ANDROID_KHR +#elif defined(_WIN32) + #define VK_USE_PLATFORM_WIN32_KHR +#elif defined(__APPLE__) + #define VK_USE_PLATFORM_MACOS_MVK + #define VK_USE_PLATFORM_METAL_EXT +#else + #define VK_USE_PLATFORM_WAYLAND_KHR + #define VK_USE_PLATFORM_XLIB_KHR +#endif + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_platform.h" + +namespace Vulkan { + +vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window) { + const auto& window_info = emu_window.GetWindowInfo(); + vk::SurfaceKHR surface{}; + + // Perform instance function loading here, to also load window system functions + VULKAN_HPP_DEFAULT_DISPATCHER.init(instance); + +#if defined(VK_USE_PLATFORM_WIN32_KHR) + if (window_info.type == Frontend::WindowSystemType::Windows) { + const vk::Win32SurfaceCreateInfoKHR win32_ci = { + .hinstance = nullptr, + .hwnd = static_cast(window_info.render_surface) + }; + + if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface"); + UNREACHABLE(); + } + } +#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR) + if (window_info.type == Frontend::WindowSystemType::X11) { + const vk::XlibSurfaceCreateInfoKHR xlib_ci = { + .dpy = static_cast(window_info.display_connection), + .window = reinterpret_cast(window_info.render_surface) + }; + + if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + UNREACHABLE(); + } + } + + if (window_info.type == Frontend::WindowSystemType::Wayland) { + const vk::WaylandSurfaceCreateInfoKHR wayland_ci = { + .display = static_cast(window_info.display_connection), + .surface = static_cast(window_info.render_surface) + }; + + if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + UNREACHABLE(); + } + } +#endif + + if (!surface) { + LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform"); + } + + return surface; +} + +std::vector GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) { + const auto properties = vk::enumerateInstanceExtensionProperties(); + if (properties.empty()) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return std::vector{}; + } + + // Add the windowing system specific extension + std::vector extensions; + extensions.reserve(6); + + switch (window_type) { + case Frontend::WindowSystemType::Headless: + break; +#if defined(VK_USE_PLATFORM_WIN32_KHR) + case Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#elif defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_WAYLAND_KHR) + case Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; + case Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + + if (window_type != Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + + if (enable_debug_utils) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + for (const char* extension : extensions) { + const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) { + return std::strcmp(extension, prop.extensionName) == 0; + }); + + if (iter == properties.end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return std::vector{}; + } + } + + return extensions; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h new file mode 100644 index 000000000..c588909f6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -0,0 +1,22 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Frontend { +class EmuWindow; +enum class WindowSystemType : u8; +} + +namespace Vulkan { + +std::vector GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils); + +vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..d958de7f6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,2153 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/video_core.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); +MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); + +RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { + position[0] = v.pos.x.ToFloat32(); + position[1] = v.pos.y.ToFloat32(); + position[2] = v.pos.z.ToFloat32(); + position[3] = v.pos.w.ToFloat32(); + color[0] = v.color.x.ToFloat32(); + color[1] = v.color.y.ToFloat32(); + color[2] = v.color.z.ToFloat32(); + color[3] = v.color.w.ToFloat32(); + tex_coord0[0] = v.tc0.x.ToFloat32(); + tex_coord0[1] = v.tc0.y.ToFloat32(); + tex_coord1[0] = v.tc1.x.ToFloat32(); + tex_coord1[1] = v.tc1.y.ToFloat32(); + tex_coord2[0] = v.tc2.x.ToFloat32(); + tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + normquat = -normquat; + } +} + +/** + * This maps to the following layout in GLSL code: + * layout(location = 0) in vec4 vert_position; + * layout(location = 1) in vec4 vert_color; + * layout(location = 2) in vec2 vert_texcoord0; + * layout(location = 3) in vec2 vert_texcoord1; + * layout(location = 4) in vec2 vert_texcoord2; + * layout(location = 5) in float vert_texcoord0_w; + * layout(location = 6) in vec4 vert_normquat; + * layout(location = 7) in vec3 vert_view; + */ +constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() { + VertexLayout layout{}; + layout.attribute_count = 8; + layout.binding_count = 1; + + // Define binding + layout.bindings[0].binding.Assign(0); + layout.bindings[0].fixed.Assign(0); + layout.bindings[0].stride.Assign(sizeof(HardwareVertex)); + + // Define attributes + constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; + u32 offset = 0; + + for (u32 loc = 0; loc < 8; loc++) { + VertexAttribute& attribute = layout.attributes[loc]; + attribute.binding.Assign(0); + attribute.location.Assign(loc); + attribute.offset.Assign(offset); + attribute.type.Assign(AttribType::Float); + attribute.size.Assign(sizes[loc]); + offset += sizes[loc] * sizeof(float); + } + + return layout; +} + +constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr u32 INDEX_BUFFER_SIZE = 2 * 1024 * 1024; +constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; +constexpr u32 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; + +constexpr std::array TEXTURE_BUFFER_LF_FORMATS = { + vk::Format::eR32G32Sfloat +}; + +constexpr std::array TEXTURE_BUFFER_FORMATS = { + vk::Format::eR32G32Sfloat, + vk::Format::eR32G32B32A32Sfloat +}; + +RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, + TaskScheduler& scheduler, TextureRuntime& runtime, + RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, runtime{runtime}, renderpass_cache{renderpass_cache}, + res_cache{*this, runtime}, pipeline_cache{instance, scheduler, renderpass_cache}, + vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, + uniform_buffer{instance, scheduler, UNIFORM_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformBuffer, {}}, + index_buffer{instance, scheduler, INDEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eIndexBuffer, {}}, + texture_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TEXTURE_BUFFER_FORMATS}, + texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TEXTURE_BUFFER_LF_FORMATS} { + + // Create a 1x1 clear texture to use in the NULL case, + default_texture = runtime.Allocate(1, 1, VideoCore::PixelFormat::RGBA8, + VideoCore::TextureType::Texture2D); + runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture, + vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + + uniform_block_data.lighting_lut_dirty.fill(true); + + uniform_buffer_alignment = instance.UniformMinAlignment(); + uniform_size_aligned_vs = + Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + + // Define vertex layout for software shaders + pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout(); + + const SamplerInfo default_sampler_info = { + .mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, + .min_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, + .mip_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, + .wrap_s = Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder, + .wrap_t = Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder + }; + + default_sampler = CreateSampler(default_sampler_info); + + // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize + // all descriptor sets even the ones we don't use. Use default_texture for this + const u32 vs_uniform_size = sizeof(Pica::Shader::VSUniformData); + const u32 fs_uniform_size = sizeof(Pica::Shader::UniformData); + pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), 0, vs_uniform_size); + pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), vs_uniform_size, fs_uniform_size); + pipeline_cache.BindTexelBuffer(2, texture_lf_buffer.GetView()); + pipeline_cache.BindTexelBuffer(3, texture_buffer.GetView(0)); + pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1)); + + for (u32 i = 0; i < 4; i++) { + pipeline_cache.BindTexture(i, default_texture.image_view); + pipeline_cache.BindSampler(i, default_sampler); + } + + for (u32 i = 0; i < 7; i++) { + pipeline_cache.BindStorageImage(i, default_texture.image_view); + } + + // Explicitly call the derived version to avoid warnings about calling virtual + // methods in the constructor + RasterizerVulkan::SyncEntireState(); +} + +RasterizerVulkan::~RasterizerVulkan() { + renderpass_cache.ExitRenderpass(); + scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown); + + VmaAllocator allocator = instance.GetAllocator(); + vk::Device device = instance.GetDevice(); + + for (auto& [key, sampler] : samplers) { + device.destroySampler(sampler); + } + + for (auto& [key, framebuffer] : framebuffers) { + device.destroyFramebuffer(framebuffer); + } + + vmaDestroyImage(allocator, default_texture.image, default_texture.allocation); + device.destroyImageView(default_texture.image_view); + device.destroySampler(default_sampler); +} + +void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + //shader_program_manager->LoadDiskCache(stop_loading, callback); +} + +void RasterizerVulkan::SyncEntireState() { + // Sync fixed function Vulkan state + SyncFixedState(); + + // Sync uniforms + SyncClipCoef(); + SyncDepthScale(); + SyncDepthOffset(); + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.texturing.GetTevStages(); + for (std::size_t index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (unsigned light_index = 0; light_index < 8; light_index++) { + SyncLightSpecular0(light_index); + SyncLightSpecular1(light_index); + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + SyncLightDistanceAttenuationBias(light_index); + SyncLightDistanceAttenuationScale(light_index); + } + + SyncFogColor(); + SyncProcTexNoise(); + SyncProcTexBias(); + SyncShadowBias(); + SyncShadowTextureBias(); +} + +void RasterizerVulkan::SyncFixedState() { + SyncClipEnabled(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncLogicOp(); + SyncStencilTest(); + SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); +} + +/** + * This is a helper function to resolve an issue when interpolating opposite quaternions. See below + * for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore + * making Dot(Q1, -Q2) positive. + * + * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is + * correct for most cases but can still rotate around the long way sometimes. An implementation + * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check + * between each step would work for those cases at the cost of being more complex to implement. + * + * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around + * these issues, making this basic implementation actually more accurate to the hardware. + */ +static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { + Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; + Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; + + return (Common::Dot(a, b) < 0.f); +} + +void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); +} + +static constexpr std::array vs_attrib_types = { + AttribType::Byte, // VertexAttributeFormat::BYTE + AttribType::Ubyte, // VertexAttributeFormat::UBYTE + AttribType::Short, // VertexAttributeFormat::SHORT + AttribType::Float // VertexAttributeFormat::FLOAT +}; + +struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; +}; + +RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + + u32 vertex_min; + u32 vertex_max; + if (is_indexed) { + const auto& index_info = regs.pipeline.index_array; + const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; + const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); + const u16* index_address_16 = reinterpret_cast(index_address_8); + const bool index_u16 = index_info.format != 0; + + vertex_min = 0xFFFF; + vertex_max = 0; + const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + res_cache.FlushRegion(address, size, nullptr); + for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { + const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; + vertex_min = std::min(vertex_min, vertex); + vertex_max = std::max(vertex_max, vertex); + } + } else { + vertex_min = regs.pipeline.vertex_offset; + vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; + } + + const u32 vertex_num = vertex_max - vertex_min + 1; + u32 vs_input_size = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count != 0) { + vs_input_size += loader.byte_count * vertex_num; + } + } + + return {vertex_min, vertex_max, vs_input_size}; +} + +void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max) { + auto [array_ptr, array_offset, _] = vertex_buffer.Map(vs_input_size, 4); + + /** + * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU + * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base + * address containing the vertex array data. The data for each attribute loader (i) can be found + * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought + * as something analogous to Vulkan bindings. The user can store attributes in separate loaders + * or interleave them in the same loader. + */ + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE + + VertexLayout layout{}; + std::array enable_attributes{}; + std::array binding_offsets{}; + + u32 buffer_offset = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count == 0 || loader.byte_count == 0) { + continue; + } + + // Analyze the attribute loader by checking which attributes it provides + u32 offset = 0; + for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) { + u32 attribute_index = loader.GetComponent(comp); + if (attribute_index < 12) { + if (u32 size = vertex_attributes.GetNumElements(attribute_index); size != 0) { + offset = Common::AlignUp(offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); + + const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); + const u32 attrib_format = static_cast(vertex_attributes.GetFormat(attribute_index)); + const AttribType type = vs_attrib_types[attrib_format]; + + // Define the attribute + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(input_reg); + attribute.offset.Assign(offset); + attribute.type.Assign(type); + attribute.size.Assign(size); + + enable_attributes[input_reg] = true; + offset += vertex_attributes.GetStride(attribute_index); + } + + } else { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } + } + + const PAddr data_addr = base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); + const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + const u32 data_size = loader.byte_count * vertex_num; + + res_cache.FlushRegion(data_addr, data_size, nullptr); + std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); + + // Create the binding associated with this loader + VertexBinding& binding = layout.bindings.at(layout.binding_count); + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(0); + binding.stride.Assign(loader.byte_count); + + // Keep track of the binding offsets so we can bind the vertex buffer later + binding_offsets[layout.binding_count++] = array_offset + buffer_offset; + array_ptr += data_size; + buffer_offset += data_size; + } + + // Reserve the last binding for fixed attributes + u32 offset = 0; + for (std::size_t i = 0; i < 16; i++) { + if (vertex_attributes.IsDefaultAttribute(i)) { + const u32 reg = regs.vs.GetRegisterForAttribute(i); + if (!enable_attributes[reg]) { + const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + const std::array data = { + attr.x.ToFloat32(), + attr.y.ToFloat32(), + attr.z.ToFloat32(), + attr.w.ToFloat32() + }; + + // Copy the data to the end of the buffer + const u32 data_size = sizeof(float) * static_cast(data.size()); + std::memcpy(array_ptr, data.data(), data_size); + + // Define the binding. Note that the counter is not incremented + VertexBinding& binding = layout.bindings.at(layout.binding_count); + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(1); + binding.stride.Assign(offset); + + VertexAttribute& attribute = layout.attributes.at(layout.attribute_count++); + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(reg); + attribute.offset.Assign(offset); + attribute.type.Assign(AttribType::Float); + attribute.size.Assign(4); + + offset += data_size; + array_ptr += data_size; + binding_offsets[layout.binding_count] = array_offset + buffer_offset; + } + } + } + + pipeline_info.vertex_layout = layout; + vertex_buffer.Commit(vs_input_size); + + std::array buffers; + buffers.fill(vertex_buffer.GetHandle()); + + // Bind the vertex buffers with all the bindings + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindVertexBuffers(0, layout.binding_count, buffers.data(), binding_offsets.data()); +} + +bool RasterizerVulkan::SetupVertexShader() { + MICROPROFILE_SCOPE(OpenGL_VS); + return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs); +} + +bool RasterizerVulkan::SetupGeometryShader() { + MICROPROFILE_SCOPE(OpenGL_GS); + const auto& regs = Pica::g_state.regs; + + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); + return false; + } + + pipeline_cache.UseFixedGeometryShader(regs); + return true; +} + +bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { + return false; + } + if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { + return false; + } + } + + if (!SetupVertexShader()) { + return false; + } + + if (!SetupGeometryShader()) { + return false; + } + + return Draw(true, is_indexed); +} + +bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + + auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); + + if (vs_input_size > VERTEX_BUFFER_SIZE) { + LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size); + return false; + } + + SetupVertexArray(vs_input_size, vs_input_index_min, vs_input_index_max); + pipeline_cache.BindPipeline(pipeline_info); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + if (is_indexed) { + bool index_u16 = regs.pipeline.index_array.format != 0; + const u64 index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + + if (index_buffer_size > INDEX_BUFFER_SIZE) { + LOG_WARNING(Render_Vulkan, "Too large index input size {}", index_buffer_size); + return false; + } + + const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( + regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + + // Upload index buffer data to the GPU + auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size, 4); + std::memcpy(index_ptr, index_data, index_buffer_size); + index_buffer.Commit(index_buffer_size); + + vk::IndexType index_type = index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT; + command_buffer.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type); + + // Submit draw + command_buffer.drawIndexed(regs.pipeline.num_vertices, 1, 0, 0, 0); + } else { + command_buffer.draw(regs.pipeline.num_vertices, 1, 0, 0); + } + + return true; +} + +void RasterizerVulkan::DrawTriangles() { + if (vertex_batch.empty()) { + return; + } + + Draw(false, false); +} + +bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { + MICROPROFILE_SCOPE(OpenGL_Drawing); + const auto& regs = Pica::g_state.regs; + + const bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask.Value(); + const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled(); + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && pipeline_info.depth_stencil.stencil_test_enable)); + + const auto viewport_rect_unscaled = Common::Rectangle{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + auto [color_surface, depth_surface, surfaces_rect] = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + pipeline_info.color_attachment = + color_surface ? color_surface->pixel_format : VideoCore::PixelFormat::Invalid; + pipeline_info.depth_attachment = + depth_surface ? depth_surface->pixel_format : VideoCore::PixelFormat::Invalid; + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + const VideoCore::Rect2D draw_rect = { + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top)) + }; + + // Sync the viewport + pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, + surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, + viewport_rect_unscaled.GetWidth() * res_scale, + viewport_rect_unscaled.GetHeight() * res_scale); + + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; + uniform_block_data.dirty = true; + } + + // Scissor checks are window-, not viewport-relative, which means that if the cached texture + // sub-rect changes, the scissor bounds also need to be updated. + int scissor_x1 = static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + int scissor_y1 = static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); + + // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when + // scaling or doing multisampling. + int scissor_x2 = static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + int scissor_y2 = static_cast(surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); + + if (uniform_block_data.data.scissor_x1 != scissor_x1 || + uniform_block_data.data.scissor_x2 != scissor_x2 || + uniform_block_data.data.scissor_y1 != scissor_y1 || + uniform_block_data.data.scissor_y2 != scissor_y2) { + + uniform_block_data.data.scissor_x1 = scissor_x1; + uniform_block_data.data.scissor_x2 = scissor_x2; + uniform_block_data.data.scissor_y1 = scissor_y1; + uniform_block_data.data.scissor_y2 = scissor_y2; + uniform_block_data.dirty = true; + } + + auto CheckBarrier = [this, &color_surface = color_surface](vk::ImageView image_view, u32 texture_index) { + if (color_surface && color_surface->alloc.image_view == image_view) { + //auto temp_tex = backend->CreateTexture(texture->GetInfo()); + //temp_tex->CopyFrom(texture); + pipeline_cache.BindTexture(texture_index, image_view); + } else { + pipeline_cache.BindTexture(texture_index, image_view); + } + }; + + const auto BindCubeFace = [&](Pica::TexturingRegs::CubeFace face, + Pica::Texture::TextureInfo& info) { + info.physical_address = regs.texturing.GetCubePhysicalAddress(face); + auto surface = res_cache.GetTextureSurface(info); + + const u32 binding = static_cast(face); + if (surface != nullptr) { + pipeline_cache.BindStorageImage(binding, surface->alloc.image_view); + } else { + pipeline_cache.BindStorageImage(binding, default_texture.image_view); + } + }; + + const auto BindSampler = [&](u32 binding, SamplerInfo& info, + const Pica::TexturingRegs::FullTextureConfig& texture) { + info = SamplerInfo{ + .mag_filter = texture.config.mag_filter, + .min_filter = texture.config.min_filter, + .mip_filter = texture.config.mip_filter, + .wrap_s = texture.config.wrap_s, + .wrap_t = texture.config.wrap_t, + .border_color = texture.config.border_color.raw, + .lod_min = texture.config.lod.min_level, + .lod_max = texture.config.lod.max_level, + .lod_bias = texture.config.lod.bias + }; + + // Search the cache and bind the appropriate sampler + if (auto it = samplers.find(info); it != samplers.end()) { + pipeline_cache.BindSampler(binding, it->second); + } else { + vk::Sampler texture_sampler = CreateSampler(info); + samplers.emplace(info, texture_sampler); + pipeline_cache.BindSampler(binding, texture_sampler); + } + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + + // Sync and bind the texture surfaces + const auto pica_textures = regs.texturing.GetTextures(); + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + if (texture.enabled) { + if (texture_index == 0) { + using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + auto surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + pipeline_cache.BindStorageImage(0, surface->alloc.image_view); + } else { + pipeline_cache.BindStorageImage(0, default_texture.image_view); + } + continue; + } + case TextureType::ShadowCube: { + using CubeFace = Pica::TexturingRegs::CubeFace; + auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, + texture.format); + BindCubeFace(CubeFace::PositiveX, info); + BindCubeFace(CubeFace::NegativeX, info); + BindCubeFace(CubeFace::PositiveY, info); + BindCubeFace(CubeFace::NegativeY, info); + BindCubeFace(CubeFace::PositiveZ, info); + BindCubeFace(CubeFace::NegativeZ, info); + continue; + } + case TextureType::TextureCube: { + using CubeFace = Pica::TexturingRegs::CubeFace; + const VideoCore::TextureCubeConfig config = { + .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), + .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX), + .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY), + .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY), + .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ), + .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ), + .width = texture.config.width, + .format = texture.format + }; + + auto surface = res_cache.GetTextureCube(config); + if (surface != nullptr) { + runtime.Transition(command_buffer, surface->alloc, + vk::ImageLayout::eShaderReadOnlyOptimal, + 0, surface->alloc.levels, 0, 6); + pipeline_cache.BindTexture(3, surface->alloc.image_view); + } else { + pipeline_cache.BindTexture(3, default_texture.image_view); + } + + BindSampler(3, texture_cube_sampler, texture); + continue; // Texture unit 0 setup finished. Continue to next unit + } + default: + break; + } + } + + // Update sampler key + BindSampler(texture_index, texture_samplers[texture_index], texture); + + auto surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + runtime.Transition(command_buffer, surface->alloc, + vk::ImageLayout::eShaderReadOnlyOptimal, + 0, surface->alloc.levels); + CheckBarrier(surface->alloc.image_view, texture_index); + } else { + // Can occur when texture addr is null or its memory is unmapped/invalid + // HACK: In this case, the correct behaviour for the PICA is to use the last + // rendered colour. But because this would be impractical to implement, the + // next best alternative is to use a clear texture, essentially skipping + // the geometry in question. + // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn + // on the male character's face, which in the OpenGL default appear black. + //state.texture_units[texture_index].texture_2d = default_texture; + pipeline_cache.BindTexture(texture_index, default_texture.image_view); + } + } else { + pipeline_cache.BindTexture(texture_index, default_texture.image_view); + pipeline_cache.BindSampler(texture_index, default_sampler); + } + } + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); + + // Sync the uniform data + UploadUniforms(accelerate); + + // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing outside of the framebuffer region + pipeline_cache.SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(), draw_rect.GetHeight()); + + auto valid_surface = color_surface ? color_surface : depth_surface; + const FramebufferInfo framebuffer_info = { + .color = color_surface ? color_surface->alloc.image_view : VK_NULL_HANDLE, + .depth = depth_surface ? depth_surface->alloc.image_view : VK_NULL_HANDLE, + .renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment, + pipeline_info.depth_attachment, false), + .width = valid_surface->GetScaledWidth(), + .height = valid_surface->GetScaledHeight() + }; + + auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{}); + if (new_framebuffer) { + it->second = CreateFramebuffer(framebuffer_info); + } + + if (color_surface) { + runtime.Transition(command_buffer, color_surface->alloc, + vk::ImageLayout::eColorAttachmentOptimal, + 0, color_surface->alloc.levels); + } + + if (depth_surface) { + runtime.Transition(command_buffer, depth_surface->alloc, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + 0, depth_surface->alloc.levels); + } + + const vk::RenderPassBeginInfo renderpass_begin = { + .renderPass = + renderpass_cache.GetRenderpass(pipeline_info.color_attachment, + pipeline_info.depth_attachment, false), + .framebuffer = it->second, + .renderArea = vk::Rect2D{ + .offset = {static_cast(draw_rect.left), static_cast(draw_rect.bottom)}, + .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()} + }, + + .clearValueCount = 0, + .pClearValues = nullptr + }; + + renderpass_cache.EnterRenderpass(renderpass_begin); + + // Draw the vertex batch + bool succeeded = true; + if (accelerate) { + succeeded = AccelerateDrawBatchInternal(is_indexed); + } else { + pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List); + pipeline_cache.UseTrivialVertexShader(); + pipeline_cache.UseTrivialGeometryShader(); + pipeline_cache.BindPipeline(pipeline_info); + + // Bind the vertex buffer at the current mapped offset. This effectively means + // that when base_vertex is zero the GPU will start drawing from the current mapped + // offset not the start of the buffer. + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), vertex_buffer.GetBufferOffset()); + + const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); + const u32 batch_size = static_cast(vertex_batch.size()); + for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { + const u32 vertices = std::min(max_vertices, batch_size - base_vertex); + const u32 vertex_size = vertices * sizeof(HardwareVertex); + + // Copy vertex data + auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); + std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); + vertex_buffer.Commit(vertex_size); + + command_buffer.draw(vertices, 1, base_vertex, 0); + } + } + + renderpass_cache.ExitRenderpass(); + + vertex_batch.clear(); + + // Mark framebuffer surfaces as dirty + const VideoCore::Rect2D draw_rect_unscaled = { + draw_rect.left / res_scale, + draw_rect.top / res_scale, + draw_rect.right / res_scale, + draw_rect.bottom / res_scale + }; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } + + return succeeded; +} + +void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) { + const auto& regs = Pica::g_state.regs; + + switch (id) { + // Culling + case PICA_REG_INDEX(rasterizer.cull_mode): + SyncCullMode(); + break; + + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX(rasterizer.clip_coef[0]): + case PICA_REG_INDEX(rasterizer.clip_coef[1]): + case PICA_REG_INDEX(rasterizer.clip_coef[2]): + case PICA_REG_INDEX(rasterizer.clip_coef[3]): + SyncClipCoef(); + break; + + // Depth modifiers + case PICA_REG_INDEX(rasterizer.viewport_depth_range): + SyncDepthScale(); + break; + case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane): + SyncDepthOffset(); + break; + + // Depth buffering + case PICA_REG_INDEX(rasterizer.depthmap_enable): + shader_dirty = true; + break; + + // Blending + case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + SyncBlendEnabled(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.blend_const): + SyncBlendColor(); + break; + + // Shadow texture + case PICA_REG_INDEX(texturing.shadow): + SyncShadowTextureBias(); + break; + + // Fog state + case PICA_REG_INDEX(texturing.fog_color): + SyncFogColor(); + break; + case PICA_REG_INDEX(texturing.fog_lut_data[0]): + case PICA_REG_INDEX(texturing.fog_lut_data[1]): + case PICA_REG_INDEX(texturing.fog_lut_data[2]): + case PICA_REG_INDEX(texturing.fog_lut_data[3]): + case PICA_REG_INDEX(texturing.fog_lut_data[4]): + case PICA_REG_INDEX(texturing.fog_lut_data[5]): + case PICA_REG_INDEX(texturing.fog_lut_data[6]): + case PICA_REG_INDEX(texturing.fog_lut_data[7]): + uniform_block_data.fog_lut_dirty = true; + break; + + // ProcTex state + case PICA_REG_INDEX(texturing.proctex): + case PICA_REG_INDEX(texturing.proctex_lut): + case PICA_REG_INDEX(texturing.proctex_lut_offset): + SyncProcTexBias(); + shader_dirty = true; + break; + + case PICA_REG_INDEX(texturing.proctex_noise_u): + case PICA_REG_INDEX(texturing.proctex_noise_v): + case PICA_REG_INDEX(texturing.proctex_noise_frequency): + SyncProcTexNoise(); + break; + + case PICA_REG_INDEX(texturing.proctex_lut_data[0]): + case PICA_REG_INDEX(texturing.proctex_lut_data[1]): + case PICA_REG_INDEX(texturing.proctex_lut_data[2]): + case PICA_REG_INDEX(texturing.proctex_lut_data[3]): + case PICA_REG_INDEX(texturing.proctex_lut_data[4]): + case PICA_REG_INDEX(texturing.proctex_lut_data[5]): + case PICA_REG_INDEX(texturing.proctex_lut_data[6]): + case PICA_REG_INDEX(texturing.proctex_lut_data[7]): + using Pica::TexturingRegs; + switch (regs.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + uniform_block_data.proctex_noise_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + uniform_block_data.proctex_color_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + uniform_block_data.proctex_alpha_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::Color: + uniform_block_data.proctex_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + uniform_block_data.proctex_diff_lut_dirty = true; + break; + } + break; + + // Alpha test + case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): + SyncAlphaTest(); + shader_dirty = true; + break; + + // Sync GL stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): + SyncStencilTest(); + break; + + // Sync GL depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) + case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): + SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync GL depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync GL color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): + SyncColorWriteMask(); + break; + + case PICA_REG_INDEX(framebuffer.shadow): + SyncShadowBias(); + break; + + // Scissor test + case PICA_REG_INDEX(rasterizer.scissor_test.mode): + shader_dirty = true; + break; + + // Logic op + case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + SyncLogicOp(); + break; + + case PICA_REG_INDEX(texturing.main_config): + shader_dirty = true; + break; + + // Texture 0 type + case PICA_REG_INDEX(texturing.texture0.type): + shader_dirty = true; + break; + + // TEV stages + // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) + case PICA_REG_INDEX(texturing.tev_stage0.color_source1): + case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage0.color_op): + case PICA_REG_INDEX(texturing.tev_stage0.color_scale): + case PICA_REG_INDEX(texturing.tev_stage1.color_source1): + case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage1.color_op): + case PICA_REG_INDEX(texturing.tev_stage1.color_scale): + case PICA_REG_INDEX(texturing.tev_stage2.color_source1): + case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage2.color_op): + case PICA_REG_INDEX(texturing.tev_stage2.color_scale): + case PICA_REG_INDEX(texturing.tev_stage3.color_source1): + case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage3.color_op): + case PICA_REG_INDEX(texturing.tev_stage3.color_scale): + case PICA_REG_INDEX(texturing.tev_stage4.color_source1): + case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage4.color_op): + case PICA_REG_INDEX(texturing.tev_stage4.color_scale): + case PICA_REG_INDEX(texturing.tev_stage5.color_source1): + case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage5.color_op): + case PICA_REG_INDEX(texturing.tev_stage5.color_scale): + case PICA_REG_INDEX(texturing.tev_combiner_buffer_input): + shader_dirty = true; + break; + case PICA_REG_INDEX(texturing.tev_stage0.const_r): + SyncTevConstColor(0, regs.texturing.tev_stage0); + break; + case PICA_REG_INDEX(texturing.tev_stage1.const_r): + SyncTevConstColor(1, regs.texturing.tev_stage1); + break; + case PICA_REG_INDEX(texturing.tev_stage2.const_r): + SyncTevConstColor(2, regs.texturing.tev_stage2); + break; + case PICA_REG_INDEX(texturing.tev_stage3.const_r): + SyncTevConstColor(3, regs.texturing.tev_stage3); + break; + case PICA_REG_INDEX(texturing.tev_stage4.const_r): + SyncTevConstColor(4, regs.texturing.tev_stage4); + break; + case PICA_REG_INDEX(texturing.tev_stage5.const_r): + SyncTevConstColor(5, regs.texturing.tev_stage5); + break; + + // TEV combiner buffer color + case PICA_REG_INDEX(texturing.tev_combiner_buffer_color): + SyncCombinerColor(); + break; + + // Fragment lighting switches + case PICA_REG_INDEX(lighting.disable): + case PICA_REG_INDEX(lighting.max_light_index): + case PICA_REG_INDEX(lighting.config0): + case PICA_REG_INDEX(lighting.config1): + case PICA_REG_INDEX(lighting.abs_lut_input): + case PICA_REG_INDEX(lighting.lut_input): + case PICA_REG_INDEX(lighting.lut_scale): + case PICA_REG_INDEX(lighting.light_enable): + break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX(lighting.light[0].specular_0): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_0): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_0): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_0): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_0): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_0): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_0): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_0): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX(lighting.light[0].specular_1): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_1): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_1): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_1): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_1): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_1): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_1): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_1): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX(lighting.light[0].diffuse): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX(lighting.light[1].diffuse): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX(lighting.light[2].diffuse): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX(lighting.light[3].diffuse): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX(lighting.light[4].diffuse): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX(lighting.light[5].diffuse): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX(lighting.light[6].diffuse): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX(lighting.light[7].diffuse): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX(lighting.light[0].ambient): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX(lighting.light[1].ambient): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX(lighting.light[2].ambient): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX(lighting.light[3].ambient): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX(lighting.light[4].ambient): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX(lighting.light[5].ambient): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX(lighting.light[6].ambient): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX(lighting.light[7].ambient): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX(lighting.light[0].x): + case PICA_REG_INDEX(lighting.light[0].z): + SyncLightPosition(0); + break; + case PICA_REG_INDEX(lighting.light[1].x): + case PICA_REG_INDEX(lighting.light[1].z): + SyncLightPosition(1); + break; + case PICA_REG_INDEX(lighting.light[2].x): + case PICA_REG_INDEX(lighting.light[2].z): + SyncLightPosition(2); + break; + case PICA_REG_INDEX(lighting.light[3].x): + case PICA_REG_INDEX(lighting.light[3].z): + SyncLightPosition(3); + break; + case PICA_REG_INDEX(lighting.light[4].x): + case PICA_REG_INDEX(lighting.light[4].z): + SyncLightPosition(4); + break; + case PICA_REG_INDEX(lighting.light[5].x): + case PICA_REG_INDEX(lighting.light[5].z): + SyncLightPosition(5); + break; + case PICA_REG_INDEX(lighting.light[6].x): + case PICA_REG_INDEX(lighting.light[6].z): + SyncLightPosition(6); + break; + case PICA_REG_INDEX(lighting.light[7].x): + case PICA_REG_INDEX(lighting.light[7].z): + SyncLightPosition(7); + break; + + // Fragment spot lighting direction + case PICA_REG_INDEX(lighting.light[0].spot_x): + case PICA_REG_INDEX(lighting.light[0].spot_z): + SyncLightSpotDirection(0); + break; + case PICA_REG_INDEX(lighting.light[1].spot_x): + case PICA_REG_INDEX(lighting.light[1].spot_z): + SyncLightSpotDirection(1); + break; + case PICA_REG_INDEX(lighting.light[2].spot_x): + case PICA_REG_INDEX(lighting.light[2].spot_z): + SyncLightSpotDirection(2); + break; + case PICA_REG_INDEX(lighting.light[3].spot_x): + case PICA_REG_INDEX(lighting.light[3].spot_z): + SyncLightSpotDirection(3); + break; + case PICA_REG_INDEX(lighting.light[4].spot_x): + case PICA_REG_INDEX(lighting.light[4].spot_z): + SyncLightSpotDirection(4); + break; + case PICA_REG_INDEX(lighting.light[5].spot_x): + case PICA_REG_INDEX(lighting.light[5].spot_z): + SyncLightSpotDirection(5); + break; + case PICA_REG_INDEX(lighting.light[6].spot_x): + case PICA_REG_INDEX(lighting.light[6].spot_z): + SyncLightSpotDirection(6); + break; + case PICA_REG_INDEX(lighting.light[7].spot_x): + case PICA_REG_INDEX(lighting.light[7].spot_z): + SyncLightSpotDirection(7); + break; + + // Fragment lighting light source config + case PICA_REG_INDEX(lighting.light[0].config): + case PICA_REG_INDEX(lighting.light[1].config): + case PICA_REG_INDEX(lighting.light[2].config): + case PICA_REG_INDEX(lighting.light[3].config): + case PICA_REG_INDEX(lighting.light[4].config): + case PICA_REG_INDEX(lighting.light[5].config): + case PICA_REG_INDEX(lighting.light[6].config): + case PICA_REG_INDEX(lighting.light[7].config): + shader_dirty = true; + break; + + // Fragment lighting distance attenuation bias + case PICA_REG_INDEX(lighting.light[0].dist_atten_bias): + SyncLightDistanceAttenuationBias(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_bias): + SyncLightDistanceAttenuationBias(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_bias): + SyncLightDistanceAttenuationBias(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_bias): + SyncLightDistanceAttenuationBias(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_bias): + SyncLightDistanceAttenuationBias(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_bias): + SyncLightDistanceAttenuationBias(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_bias): + SyncLightDistanceAttenuationBias(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_bias): + SyncLightDistanceAttenuationBias(7); + break; + + // Fragment lighting distance attenuation scale + case PICA_REG_INDEX(lighting.light[0].dist_atten_scale): + SyncLightDistanceAttenuationScale(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_scale): + SyncLightDistanceAttenuationScale(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_scale): + SyncLightDistanceAttenuationScale(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_scale): + SyncLightDistanceAttenuationScale(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_scale): + SyncLightDistanceAttenuationScale(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_scale): + SyncLightDistanceAttenuationScale(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_scale): + SyncLightDistanceAttenuationScale(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_scale): + SyncLightDistanceAttenuationScale(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX(lighting.global_ambient): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX(lighting.lut_data[0]): + case PICA_REG_INDEX(lighting.lut_data[1]): + case PICA_REG_INDEX(lighting.lut_data[2]): + case PICA_REG_INDEX(lighting.lut_data[3]): + case PICA_REG_INDEX(lighting.lut_data[4]): + case PICA_REG_INDEX(lighting.lut_data[5]): + case PICA_REG_INDEX(lighting.lut_data[6]): + case PICA_REG_INDEX(lighting.lut_data[7]): { + const auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty_any = true; + break; + } + } +} + +void RasterizerVulkan::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushAll(); +} + +void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} + +void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + MICROPROFILE_SCOPE(OpenGL_Blits); + + VideoCore::SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.width = config.output_width; + src_params.stride = config.input_width; + src_params.height = config.output_height; + src_params.is_tiled = !config.input_linear; + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); + + VideoCore::SurfaceParams dst_params; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + dst_params.is_tiled = config.input_linear != config.dont_swizzle; + dst_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); + + auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); + if (src_surface == nullptr) + return false; + + dst_params.res_scale = src_surface->res_scale; + + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, false); + if (dst_surface == nullptr) { + return false; + } + + if (src_surface->is_tiled != dst_surface->is_tiled) + std::swap(src_rect.top, src_rect.bottom); + + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { + return false; + } + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; + } + + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0 && output_gap != 0) { + return false; + } + if (output_gap == 0 || output_width >= copy_size) { + output_width = copy_size; + output_gap = 0; + } + if (copy_size % output_width != 0) { + return false; + } + + VideoCore::SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = copy_size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + auto [src_surface, src_rect] = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) { + return false; + } + + if (output_gap != 0 && + (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || + output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { + return false; + } + + VideoCore::SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_surface->res_scale; + dst_params.stride = dst_params.width + src_surface->PixelsInBytes( + src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_surface->res_scale; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); + + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, load_gap); + if (dst_surface == nullptr) { + return false; + } + + if (dst_surface->type == VideoCore::SurfaceType::Texture) { + return false; + } + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { + return false; + } + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + auto dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) + return false; + + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); + return true; +} + +bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) { + return false; + } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + VideoCore::SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(config.width.Value(), pixel_stride); + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); + + const auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = Common::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = &src_surface->alloc; + + return true; +} + +vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) { + auto properties = instance.GetPhysicalDevice().getProperties(); + const vk::SamplerCreateInfo sampler_info = { + .magFilter = PicaToVK::TextureFilterMode(info.mag_filter), + .minFilter = PicaToVK::TextureFilterMode(info.min_filter), + .mipmapMode = PicaToVK::TextureMipFilterMode(info.mip_filter), + .addressModeU = PicaToVK::WrapMode(info.wrap_s), + .addressModeV = PicaToVK::WrapMode(info.wrap_t), + .mipLodBias = info.lod_bias / 256.0f, + .anisotropyEnable = true, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .minLod = static_cast(info.lod_min), + .maxLod = static_cast(info.lod_max), + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false + }; + + vk::Device device = instance.GetDevice(); + return device.createSampler(sampler_info); +} + +vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info) { + u32 attachment_count = 0; + std::array attachments; + + if (info.color) { + attachments[attachment_count++] = info.color; + } + + if (info.depth) { + attachments[attachment_count++] = info.depth; + } + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = info.renderpass, + .attachmentCount = attachment_count, + .pAttachments = attachments.data(), + .width = info.width, + .height = info.height, + .layers = 1 + }; + + vk::Device device = instance.GetDevice(); + return device.createFramebuffer(framebuffer_info); +} + +void RasterizerVulkan::FlushBuffers() { + vertex_buffer.Flush(); + uniform_buffer.Flush(); + index_buffer.Flush(); + texture_buffer.Flush(); + texture_lf_buffer.Flush(); +} + +void RasterizerVulkan::SetShader() { + pipeline_cache.UseFragmentShader(Pica::g_state.regs); +} + +void RasterizerVulkan::SyncClipEnabled() { + uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerVulkan::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncCullMode() { + const auto& regs = Pica::g_state.regs; + if (instance.IsExtendedDynamicStateSupported()) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setCullModeEXT(PicaToVK::CullMode(regs.rasterizer.cull_mode)); + command_buffer.setFrontFaceEXT(PicaToVK::FrontFace(regs.rasterizer.cull_mode)); + } + + pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); +} + +void RasterizerVulkan::SyncDepthScale() { + float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); + + if (depth_scale != uniform_block_data.data.depth_scale) { + uniform_block_data.data.depth_scale = depth_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncDepthOffset() { + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32(); + + if (depth_offset != uniform_block_data.data.depth_offset) { + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncBlendEnabled() { + pipeline_info.blending.blend_enable.Assign(Pica::g_state.regs.framebuffer.output_merger.alphablend_enable); +} + +void RasterizerVulkan::SyncBlendFuncs() { + const auto& regs = Pica::g_state.regs; + + pipeline_info.blending.color_blend_eq.Assign(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + pipeline_info.blending.alpha_blend_eq.Assign(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + pipeline_info.blending.src_color_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + pipeline_info.blending.dst_color_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + pipeline_info.blending.src_alpha_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_source_a); + pipeline_info.blending.dst_alpha_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerVulkan::SyncBlendColor() { + auto blend_color = + PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setBlendConstants(blend_color.AsArray()); +} + +void RasterizerVulkan::SyncFogColor() { + const auto& regs = Pica::g_state.regs; + uniform_block_data.data.fog_color = { + regs.texturing.fog_color.r.Value() / 255.0f, + regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.b.Value() / 255.0f, + }; + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncProcTexNoise() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_noise_f = { + Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), + }; + uniform_block_data.data.proctex_noise_a = { + regs.proctex_noise_u.amplitude / 4095.0f, + regs.proctex_noise_v.amplitude / 4095.0f, + }; + uniform_block_data.data.proctex_noise_p = { + Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), + }; + + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncProcTexBias() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_bias = + Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8)) + .ToFloat32(); + + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncAlphaTest() { + const auto& regs = Pica::g_state.regs; + if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { + uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLogicOp() { + const auto& regs = Pica::g_state.regs; + pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op); +} + +void RasterizerVulkan::SyncColorWriteMask() { + const auto& regs = Pica::g_state.regs; + const u32 color_mask = (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF; + pipeline_info.blending.color_write_mask.Assign(color_mask); +} + +void RasterizerVulkan::SyncStencilWriteMask() { + const auto& regs = Pica::g_state.regs; + pipeline_info.depth_stencil.stencil_write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, pipeline_info.depth_stencil.stencil_write_mask); +} + +void RasterizerVulkan::SyncDepthWriteMask() { + const auto& regs = Pica::g_state.regs; + + const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); + + if (instance.IsExtendedDynamicStateSupported()) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setDepthWriteEnableEXT(write_enable); + } + + pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable); +} + +void RasterizerVulkan::SyncStencilTest() { + const auto& regs = Pica::g_state.regs; + + const bool test_enable = regs.framebuffer.output_merger.stencil_test.enable && + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + const auto& stencil_test = regs.framebuffer.output_merger.stencil_test; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, stencil_test.input_mask); + command_buffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, stencil_test.reference_value); + + if (instance.IsExtendedDynamicStateSupported()) { + command_buffer.setStencilTestEnableEXT(test_enable); + command_buffer.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, + PicaToVK::StencilOp(stencil_test.action_stencil_fail), + PicaToVK::StencilOp(stencil_test.action_depth_pass), + PicaToVK::StencilOp(stencil_test.action_depth_fail), + PicaToVK::CompareFunc(stencil_test.func)); + } + + pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable); + pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail); + pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass); + pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail); + pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func); + pipeline_info.depth_stencil.stencil_reference = stencil_test.reference_value; + pipeline_info.depth_stencil.stencil_write_mask = stencil_test.input_mask; +} + +void RasterizerVulkan::SyncDepthTest() { + const auto& regs = Pica::g_state.regs; + + const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1 + ? regs.framebuffer.output_merger.depth_test_func.Value() + : Pica::FramebufferRegs::CompareFunc::Always; + + if (instance.IsExtendedDynamicStateSupported()) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setDepthCompareOpEXT(PicaToVK::CompareFunc(compare_op)); + command_buffer.setDepthTestEnableEXT(test_enabled); + } + + pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled); + pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op); +} + +void RasterizerVulkan::SyncCombinerColor() { + auto combiner_color = PicaToVK::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); + if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { + uniform_block_data.data.tev_combiner_buffer_color = combiner_color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncTevConstColor(std::size_t stage_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage) { + const auto const_color = PicaToVK::ColorRGBA8(tev_stage.const_color); + + if (const_color == uniform_block_data.data.const_color[stage_index]) { + return; + } + + uniform_block_data.data.const_color[stage_index] = const_color; + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncGlobalAmbient() { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightSpecular0(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightSpecular1(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightDiffuse(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightAmbient(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightPosition(int light_index) { + const Common::Vec3f position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() + }; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightSpotDirection(int light_index) { + const auto& light = Pica::g_state.regs.lighting.light[light_index]; + const auto spot_direction = Common::Vec3f{light.spot_x, light.spot_y, light.spot_z} / 2047.0f; + + if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { + uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) { + float dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) + .ToFloat32(); + + if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { + uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) { + float dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) + .ToFloat32(); + + if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { + uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncShadowBias() { + const auto& shadow = Pica::g_state.regs.framebuffer.shadow; + float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); + float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); + + if (constant != uniform_block_data.data.shadow_bias_constant || + linear != uniform_block_data.data.shadow_bias_linear) { + uniform_block_data.data.shadow_bias_constant = constant; + uniform_block_data.data.shadow_bias_linear = linear; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncShadowTextureBias() { + int bias = Pica::g_state.regs.texturing.shadow.bias << 1; + if (bias != uniform_block_data.data.shadow_texture_bias) { + uniform_block_data.data.shadow_texture_bias = bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + + sizeof(Common::Vec2f) * 128; // fog + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + uniform_block_data.lighting_lut_dirty_any = false; + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.fog_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.fog_lut_dirty = false; + } + + texture_lf_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::SyncAndUploadLUTs() { + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha + sizeof(Common::Vec4f) * 256 + // proctex + sizeof(Common::Vec4f) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto SyncProcTexValueLUT = [this, &buffer = buffer, &offset = offset, &invalidate = invalidate, &bytes_used]( + const std::array& lut, + std::array& lut_data, int& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { + const bool sync_vs = accelerate_draw; + const bool sync_fs = uniform_block_data.dirty; + + if (!sync_vs && !sync_fs) { + return; + } + + u32 used_bytes = 0; + const u32 uniform_size = static_cast(uniform_size_aligned_vs + uniform_size_aligned_fs); + auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, + static_cast(uniform_buffer_alignment)); + + if (sync_vs) { + Pica::Shader::VSUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); + std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); + + pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms)); + used_bytes += static_cast(uniform_size_aligned_vs); + } + + if (sync_fs || invalidate) { + std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(Pica::Shader::UniformData)); + + pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), offset + used_bytes, + sizeof(uniform_block_data.data)); + uniform_block_data.dirty = false; + used_bytes += static_cast(uniform_size_aligned_fs); + } + + uniform_buffer.Commit(used_bytes); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 000000000..1a58b8f51 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,317 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/regs_lighting.h" +#include "video_core/regs_texturing.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_uniforms.h" + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { + +struct ScreenInfo; + +class Instance; +class TaskScheduler; +class RenderpassCache; + +struct SamplerInfo { + using TextureConfig = Pica::TexturingRegs::TextureConfig; + TextureConfig::TextureFilter mag_filter; + TextureConfig::TextureFilter min_filter; + TextureConfig::TextureFilter mip_filter; + TextureConfig::WrapMode wrap_s; + TextureConfig::WrapMode wrap_t; + u32 border_color = 0; + u32 lod_min = 0; + u32 lod_max = 0; + s32 lod_bias = 0; + + // TODO(wwylele): remove this once mipmap for cube is implemented + bool supress_mipmap_for_cube = false; + + auto operator<=>(const SamplerInfo&) const noexcept = default; +}; + +struct FramebufferInfo { + vk::ImageView color; + vk::ImageView depth; + vk::RenderPass renderpass; + u32 width = 1; + u32 height = 1; + + auto operator<=>(const FramebufferInfo&) const noexcept = default; +}; + +} + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::SamplerInfo& info) const noexcept { + return Common::ComputeHash64(&info, sizeof(Vulkan::SamplerInfo)); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::FramebufferInfo& info) const noexcept { + return Common::ComputeHash64(&info, sizeof(Vulkan::FramebufferInfo)); + } +}; +} // namespace std + +namespace Vulkan { + +class RasterizerVulkan : public VideoCore::RasterizerAccelerated { + friend class RendererVulkan; +public: + explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, TaskScheduler& scheduler, + TextureRuntime& runtime, RenderpassCache& renderpass_cache); + ~RasterizerVulkan() override; + + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; + void DrawTriangles() override; + void NotifyPicaRegisterChanged(u32 id) override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info); + bool AccelerateDrawBatch(bool is_indexed) override; + + /// Syncs entire status to match PICA registers + void SyncEntireState() override; + + /// Sync fixed function pipeline state + void SyncFixedState(); + + /// Flushes all rasterizer owned buffers + void FlushBuffers(); + +private: + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + + /// Sets the OpenGL shader in accordance with the current PICA register state + void SetShader(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the depth scale to match the PICA register + void SyncDepthScale(); + + /// Syncs the depth offset to match the PICA register + void SyncDepthOffset(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the fog states to match the PICA register + void SyncFogColor(); + + /// Sync the procedural texture noise configuration to match the PICA register + void SyncProcTexNoise(); + + /// Sync the procedural texture bias configuration to match the PICA register + void SyncProcTexBias(); + + /// Syncs the alpha test states to match the PICA register + void SyncAlphaTest(); + + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs the TEV combiner color buffer to match the PICA register + void SyncCombinerColor(); + + /// Syncs the TEV constant color to match the PICA register + void SyncTevConstColor(std::size_t tev_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage); + + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + + /// Syncs the specified spot light direcition to match the PICA register + void SyncLightSpotDirection(int light_index); + + /// Syncs the specified light's distance attenuation bias to match the PICA register + void SyncLightDistanceAttenuationBias(int light_index); + + /// Syncs the specified light's distance attenuation scale to match the PICA register + void SyncLightDistanceAttenuationScale(int light_index); + + /// Syncs the shadow rendering bias to match the PICA register + void SyncShadowBias(); + + /// Syncs the shadow texture bias to match the PICA register + void SyncShadowTextureBias(); + + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); + + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(bool accelerate_draw); + + /// Generic draw function for DrawTriangles and AccelerateDrawBatch + bool Draw(bool accelerate, bool is_indexed); + + /// Internal implementation for AccelerateDrawBatch + bool AccelerateDrawBatchInternal(bool is_indexed); + + struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; + }; + + /// Retrieve the range and the size of the input vertex + VertexArrayInfo AnalyzeVertexArray(bool is_indexed); + + /// Setup vertex array for AccelerateDrawBatch + void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max); + + /// Setup vertex shader for AccelerateDrawBatch + bool SetupVertexShader(); + + /// Setup geometry shader for AccelerateDrawBatch + bool SetupGeometryShader(); + + /// Creates a new sampler object + vk::Sampler CreateSampler(const SamplerInfo& info); + + /// Creates a new Vulkan framebuffer object + vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info); + +private: + const Instance& instance; + TaskScheduler& scheduler; + TextureRuntime& runtime; + RenderpassCache& renderpass_cache; + RasterizerCache res_cache; + PipelineCache pipeline_cache; + bool shader_dirty = true; + + /// Structure that the hardware rendered vertices are composed of + struct HardwareVertex { + HardwareVertex() = default; + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion); + + constexpr static VertexLayout GetVertexLayout(); + + Common::Vec4f position; + Common::Vec4f color; + Common::Vec2f tex_coord0; + Common::Vec2f tex_coord1; + Common::Vec2f tex_coord2; + float tex_coord0_w; + Common::Vec4f normquat; + Common::Vec3f view; + }; + + std::vector vertex_batch; + ImageAlloc default_texture; + vk::Sampler default_sampler; + + struct { + Pica::Shader::UniformData data{}; + std::array lighting_lut_dirty{}; + bool lighting_lut_dirty_any = true; + bool fog_lut_dirty = true; + bool proctex_noise_lut_dirty = true; + bool proctex_color_map_dirty = true; + bool proctex_alpha_map_dirty = true; + bool proctex_lut_dirty = true; + bool proctex_diff_lut_dirty = true; + bool dirty = true; + } uniform_block_data = {}; + + std::array hw_enabled_attributes{}; + + std::array texture_samplers; + SamplerInfo texture_cube_sampler; + std::unordered_map samplers; + std::unordered_map framebuffers; + + StreamBuffer vertex_buffer; + StreamBuffer uniform_buffer; + StreamBuffer index_buffer; + StreamBuffer texture_buffer; + StreamBuffer texture_lf_buffer; + PipelineInfo pipeline_info; + std::size_t uniform_buffer_alignment; + std::size_t uniform_size_aligned_vs; + std::size_t uniform_size_aligned_fs; + + std::array, Pica::LightingRegs::NumLightingSampler> + lighting_lut_data{}; + std::array fog_lut_data{}; + std::array proctex_noise_lut_data{}; + std::array proctex_color_map_data{}; + std::array proctex_alpha_map_data{}; + std::array proctex_lut_data{}; + std::array proctex_diff_lut_data{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp new file mode 100644 index 000000000..5f7a3a7b0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -0,0 +1,196 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" + +namespace Vulkan { + +vk::Format ToVkFormatColor(u32 index) { + switch (index) { + case 0: return vk::Format::eR8G8B8A8Unorm; + case 1: return vk::Format::eR8G8B8Unorm; + case 2: return vk::Format::eR5G5B5A1UnormPack16; + case 3: return vk::Format::eR5G6B5UnormPack16; + case 4: return vk::Format::eR4G4B4A4UnormPack16; + default: return vk::Format::eUndefined; + } +} + +vk::Format ToVkFormatDepth(u32 index) { + switch (index) { + case 0: return vk::Format::eD16Unorm; + case 1: return vk::Format::eX8D24UnormPack32; + // Notice the similar gap in PixelFormat + case 3: return vk::Format::eD24UnormS8Uint; + default: return vk::Format::eUndefined; + } +} + +RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler) + : instance{instance}, scheduler{scheduler} { + // Pre-create all needed renderpasses by the renderer + for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { + for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { + const vk::Format color_format = + instance.GetFormatAlternative(ToVkFormatColor(color)); + const vk::Format depth_stencil_format = + instance.GetFormatAlternative(ToVkFormatDepth(depth)); + + if (color_format == vk::Format::eUndefined && + depth_stencil_format == vk::Format::eUndefined) { + continue; + } + + cached_renderpasses[color][depth][0] = CreateRenderPass(color_format, depth_stencil_format, + vk::AttachmentLoadOp::eLoad, + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eColorAttachmentOptimal); + cached_renderpasses[color][depth][1] = CreateRenderPass(color_format, depth_stencil_format, + vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eColorAttachmentOptimal); + } + } +} + +RenderpassCache::~RenderpassCache() { + vk::Device device = instance.GetDevice(); + for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { + for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { + if (vk::RenderPass load_pass = cached_renderpasses[color][depth][0]; load_pass) { + device.destroyRenderPass(load_pass); + } + + if (vk::RenderPass clear_pass = cached_renderpasses[color][depth][1]; clear_pass) { + device.destroyRenderPass(clear_pass); + } + } + } + + device.destroyRenderPass(present_renderpass); +} + +void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) { + if (active_renderpass == begin_info.renderPass) { + return; + } + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + if (active_renderpass) { + command_buffer.endRenderPass(); + } + + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + active_renderpass = begin_info.renderPass; +} + +void RenderpassCache::ExitRenderpass() { + if (!active_renderpass) { + return; + } + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.endRenderPass(); + active_renderpass = VK_NULL_HANDLE; +} + +void RenderpassCache::CreatePresentRenderpass(vk::Format format) { + if (!present_renderpass) { + present_renderpass = CreateRenderPass(format, vk::Format::eUndefined, + vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eUndefined, + vk::ImageLayout::ePresentSrcKHR); + } +} + +vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear) const { + const u32 color_index = + color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast(color); + const u32 depth_index = + depth == VideoCore::PixelFormat::Invalid ? MAX_DEPTH_FORMATS : (static_cast(depth) - 14); + + ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS); + return cached_renderpasses[color_index][depth_index][is_clear]; +} + +vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op, + vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const { + // Define attachments + u32 attachment_count = 0; + std::array attachments; + + bool use_color = false; + vk::AttachmentReference color_attachment_ref{}; + bool use_depth = false; + vk::AttachmentReference depth_attachment_ref{}; + + if (color != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = color, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = initial_layout, + .finalLayout = final_layout + }; + + color_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eColorAttachmentOptimal + }; + + use_color = true; + } + + if (depth != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = depth, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eLoad, + .stencilStoreOp = vk::AttachmentStoreOp::eStore, + .initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, + .finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + depth_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + use_depth = true; + } + + // We also require only one subpass + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = use_color ? 1u : 0u, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = attachment_count, + .pAttachments = attachments.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr + }; + + // Create the renderpass + vk::Device device = instance.GetDevice(); + return device.createRenderPass(renderpass_info); +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h new file mode 100644 index 000000000..1db3aa94a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -0,0 +1,55 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class TaskScheduler; + +constexpr u32 MAX_COLOR_FORMATS = 5; +constexpr u32 MAX_DEPTH_FORMATS = 4; + +class RenderpassCache { +public: + RenderpassCache(const Instance& instance, TaskScheduler& scheduler); + ~RenderpassCache(); + + /// Begins a new renderpass only when no other renderpass is currently active + void EnterRenderpass(const vk::RenderPassBeginInfo begin_info); + + /// Exits from any currently active renderpass instance + void ExitRenderpass(); + + /// Returns the renderpass associated with the color-depth format pair + [[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear) const; + + /// Returns the swapchain clear renderpass + [[nodiscard]] vk::RenderPass GetPresentRenderpass() const { + return present_renderpass; + } + + /// Creates the renderpass used when rendering to the swapchain + void CreatePresentRenderpass(vk::Format format); + +private: + /// Creates a renderpass configured appropriately and stores it in cached_renderpasses + vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op, + vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const; + +private: + const Instance& instance; + TaskScheduler& scheduler; + + vk::RenderPass active_renderpass = VK_NULL_HANDLE; + vk::RenderPass present_renderpass{}; + vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2]; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.cpp b/src/video_core/renderer_vulkan/vk_shader.cpp new file mode 100644 index 000000000..ce0038328 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader.cpp @@ -0,0 +1,223 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include +#include +#include + +namespace Vulkan { + +constexpr TBuiltInResource DefaultTBuiltInResource = { + .maxLights = 32, + .maxClipPlanes = 6, + .maxTextureUnits = 32, + .maxTextureCoords = 32, + .maxVertexAttribs = 64, + .maxVertexUniformComponents = 4096, + .maxVaryingFloats = 64, + .maxVertexTextureImageUnits = 32, + .maxCombinedTextureImageUnits = 80, + .maxTextureImageUnits = 32, + .maxFragmentUniformComponents = 4096, + .maxDrawBuffers = 32, + .maxVertexUniformVectors = 128, + .maxVaryingVectors = 8, + .maxFragmentUniformVectors = 16, + .maxVertexOutputVectors = 16, + .maxFragmentInputVectors = 15, + .minProgramTexelOffset = -8, + .maxProgramTexelOffset = 7, + .maxClipDistances = 8, + .maxComputeWorkGroupCountX = 65535, + .maxComputeWorkGroupCountY = 65535, + .maxComputeWorkGroupCountZ = 65535, + .maxComputeWorkGroupSizeX = 1024, + .maxComputeWorkGroupSizeY = 1024, + .maxComputeWorkGroupSizeZ = 64, + .maxComputeUniformComponents = 1024, + .maxComputeTextureImageUnits = 16, + .maxComputeImageUniforms = 8, + .maxComputeAtomicCounters = 8, + .maxComputeAtomicCounterBuffers = 1, + .maxVaryingComponents = 60, + .maxVertexOutputComponents = 64, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxFragmentInputComponents = 128, + .maxImageUnits = 8, + .maxCombinedImageUnitsAndFragmentOutputs = 8, + .maxCombinedShaderOutputResources = 8, + .maxImageSamples = 0, + .maxVertexImageUniforms = 0, + .maxTessControlImageUniforms = 0, + .maxTessEvaluationImageUniforms = 0, + .maxGeometryImageUniforms = 0, + .maxFragmentImageUniforms = 8, + .maxCombinedImageUniforms = 8, + .maxGeometryTextureImageUnits = 16, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxGeometryUniformComponents = 1024, + .maxGeometryVaryingComponents = 64, + .maxTessControlInputComponents = 128, + .maxTessControlOutputComponents = 128, + .maxTessControlTextureImageUnits = 16, + .maxTessControlUniformComponents = 1024, + .maxTessControlTotalOutputComponents = 4096, + .maxTessEvaluationInputComponents = 128, + .maxTessEvaluationOutputComponents = 128, + .maxTessEvaluationTextureImageUnits = 16, + .maxTessEvaluationUniformComponents = 1024, + .maxTessPatchComponents = 120, + .maxPatchVertices = 32, + .maxTessGenLevel = 64, + .maxViewports = 16, + .maxVertexAtomicCounters = 0, + .maxTessControlAtomicCounters = 0, + .maxTessEvaluationAtomicCounters = 0, + .maxGeometryAtomicCounters = 0, + .maxFragmentAtomicCounters = 8, + .maxCombinedAtomicCounters = 8, + .maxAtomicCounterBindings = 1, + .maxVertexAtomicCounterBuffers = 0, + .maxTessControlAtomicCounterBuffers = 0, + .maxTessEvaluationAtomicCounterBuffers = 0, + .maxGeometryAtomicCounterBuffers = 0, + .maxFragmentAtomicCounterBuffers = 1, + .maxCombinedAtomicCounterBuffers = 1, + .maxAtomicCounterBufferSize = 16384, + .maxTransformFeedbackBuffers = 4, + .maxTransformFeedbackInterleavedComponents = 64, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .maxSamples = 4, + .maxMeshOutputVerticesNV = 256, + .maxMeshOutputPrimitivesNV = 512, + .maxMeshWorkGroupSizeX_NV = 32, + .maxMeshWorkGroupSizeY_NV = 1, + .maxMeshWorkGroupSizeZ_NV = 1, + .maxTaskWorkGroupSizeX_NV = 32, + .maxTaskWorkGroupSizeY_NV = 1, + .maxTaskWorkGroupSizeZ_NV = 1, + .maxMeshViewCountNV = 4, + .maxDualSourceDrawBuffersEXT = 1, + .limits = TLimits{ + .nonInductiveForLoops = 1, + .whileLoops = 1, + .doWhileLoops = 1, + .generalUniformIndexing = 1, + .generalAttributeMatrixVectorIndexing = 1, + .generalVaryingIndexing = 1, + .generalSamplerIndexing = 1, + .generalVariableIndexing = 1, + .generalConstantMatrixVectorIndexing = 1, + } +}; + +EShLanguage ToEshShaderStage(vk::ShaderStageFlagBits stage) { + switch (stage) { + case vk::ShaderStageFlagBits::eVertex: + return EShLanguage::EShLangVertex; + case vk::ShaderStageFlagBits::eGeometry: + return EShLanguage::EShLangGeometry; + case vk::ShaderStageFlagBits::eFragment: + return EShLanguage::EShLangFragment; + case vk::ShaderStageFlagBits::eCompute: + return EShLanguage::EShLangCompute; + default: + LOG_CRITICAL(Render_Vulkan, "Unkown shader stage"); + UNREACHABLE(); + } + + return EShLanguage::EShLangVertex; +} + +bool InitializeCompiler() { + static bool glslang_initialized = false; + + if (glslang_initialized) { + return true; + } + + if (!glslang::InitializeProcess()) { + LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler"); + return false; + } + + std::atexit([]() { glslang::FinalizeProcess(); }); + + glslang_initialized = true; + return true; +} + +vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device, + ShaderOptimization level) { + if (!InitializeCompiler()) { + return VK_NULL_HANDLE; + } + + EProfile profile = ECoreProfile; + EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); + EShLanguage lang = ToEshShaderStage(stage); + + int default_version = 450; + const char* pass_source_code = code.data(); + int pass_source_code_length = static_cast(code.size()); + + auto shader = std::make_unique(lang); + shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3); + shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); + + glslang::TShader::ForbidIncluder includer; + if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) { + LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); + return VK_NULL_HANDLE; + } + + // Even though there's only a single shader, we still need to link it to generate SPV + auto program = std::make_unique(); + program->addShader(shader.get()); + if (!program->link(messages)) { + LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog()); + return VK_NULL_HANDLE; + } + + glslang::TIntermediate* intermediate = program->getIntermediate(lang); + std::vector out_code; + spv::SpvBuildLogger logger; + glslang::SpvOptions options; + + // Compile the SPIR-V module without optimizations for easier debugging in RenderDoc. + if (level == ShaderOptimization::Debug) { + intermediate->addSourceText(pass_source_code, pass_source_code_length); + options.generateDebugInfo = true; + options.disableOptimizer = true; + options.optimizeSize = false; + options.disassemble = false; + options.validate = true; + } else { + options.disableOptimizer = false; + options.stripDebugInfo = true; + } + + glslang::GlslangToSpv(*intermediate, out_code, &logger, &options); + + const std::string spv_messages = logger.getAllMessages(); + if (!spv_messages.empty()) { + LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages); + } + + const vk::ShaderModuleCreateInfo shader_info = { + .codeSize = out_code.size() * sizeof(u32), + .pCode = out_code.data() + }; + + return device.createShaderModule(shader_info); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.h b/src/video_core/renderer_vulkan/vk_shader.h new file mode 100644 index 000000000..14d225d88 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader.h @@ -0,0 +1,19 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +enum class ShaderOptimization { + High = 0, + Debug = 1 +}; + +vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, + vk::Device device, ShaderOptimization level); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp new file mode 100644 index 000000000..2dc39f41a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -0,0 +1,1758 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/bit_set.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "video_core/pica_state.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" +#include "video_core/video_core.h" + +using Pica::FramebufferRegs; +using Pica::LightingRegs; +using Pica::RasterizerRegs; +using Pica::TexturingRegs; +using TevStageConfig = TexturingRegs::TevStageConfig; +using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; + +namespace Vulkan { + +constexpr std::string_view UniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 + +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; + +layout (set = 0, binding = 1, std140) uniform shader_data { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + bool enable_clip1; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec4 clip_coef; +}; +)"; + +static std::string GetVertexInterfaceDeclaration(bool is_output) { + std::string out; + + const auto append_variable = [&](std::string_view var, int location) { + out += fmt::format("layout (location={}) ", location); + out += fmt::format("{}{};\n", is_output ? "out " : "in ", var); + }; + + append_variable("vec4 primary_color", ATTRIBUTE_COLOR); + append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0); + append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1); + append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2); + append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W); + append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); + append_variable("vec3 view", ATTRIBUTE_VIEW); + + if (is_output) { + // gl_PerVertex redeclaration is required for separate shader object + out += R"( +out gl_PerVertex { + vec4 gl_Position; + float gl_ClipDistance[2]; +}; +)"; + } + + return out; +} + +PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { + PicaFSConfig res{}; + + auto& state = res.state; + + state.scissor_test_mode = regs.rasterizer.scissor_test.mode; + + state.depthmap_enable = regs.rasterizer.depthmap_enable; + + state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable + ? regs.framebuffer.output_merger.alpha_test.func.Value() + : FramebufferRegs::CompareFunc::Always; + + state.texture0_type = regs.texturing.texture0.type; + + state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; + + state.alphablend_enable = {}; + state.logic_op = {}; + + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. + const auto& tev_stages = regs.texturing.GetTevStages(); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); + for (std::size_t i = 0; i < tev_stages.size(); i++) { + const auto& tev_stage = tev_stages[i]; + state.tev_stages[i].sources_raw = tev_stage.sources_raw; + state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + state.tev_stages[i].ops_raw = tev_stage.ops_raw; + state.tev_stages[i].scales_raw = tev_stage.scales_raw; + } + + state.fog_mode = regs.texturing.fog_mode; + state.fog_flip = regs.texturing.fog_flip != 0; + + state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() + << 4; + + // Fragment lighting + + state.lighting.enable = !regs.lighting.disable; + state.lighting.src_num = regs.lighting.max_light_index + 1; + + for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { + unsigned num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; + state.lighting.light[light_index].num = num; + state.lighting.light[light_index].directional = light.config.directional != 0; + state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; + state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0; + state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0; + state.lighting.light[light_index].dist_atten_enable = + !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].spot_atten_enable = + !regs.lighting.IsSpotAttenDisabled(num); + state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num); + } + + state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; + state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; + state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + // this is a dummy field due to lack of the corresponding register + state.lighting.lut_sp.enable = true; + state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; + state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); + state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); + + state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; + state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + + state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; + state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; + state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; + state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + + state.lighting.config = regs.lighting.config0.config; + state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha; + state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha; + state.lighting.bump_mode = regs.lighting.config0.bump_mode; + state.lighting.bump_selector = regs.lighting.config0.bump_selector; + state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; + state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; + + state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0; + state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0; + state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0; + state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0; + state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0; + state.lighting.shadow_selector = regs.lighting.config0.shadow_selector; + + state.proctex.enable = regs.texturing.main_config.texture3_enable; + if (state.proctex.enable) { + state.proctex.coord = regs.texturing.main_config.texture3_coordinates; + state.proctex.u_clamp = regs.texturing.proctex.u_clamp; + state.proctex.v_clamp = regs.texturing.proctex.v_clamp; + state.proctex.color_combiner = regs.texturing.proctex.color_combiner; + state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; + state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; + state.proctex.noise_enable = regs.texturing.proctex.noise_enable; + state.proctex.u_shift = regs.texturing.proctex.u_shift; + state.proctex.v_shift = regs.texturing.proctex.v_shift; + state.proctex.lut_width = regs.texturing.proctex_lut.width; + state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; + state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; + state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; + state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; + state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; + state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; + state.proctex.lut_filter = regs.texturing.proctex_lut.filter; + } + + state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + FramebufferRegs::FragmentOperationMode::Shadow; + + state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; + + return res; +} + +void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + swizzle_hash = setup.GetSwizzleDataHash(); + main_offset = regs.main_offset; + sanitize_mul = VideoCore::g_hw_shader_accurate_mul; + + num_outputs = 0; + output_map.fill(16); + + for (int reg : Common::BitSet(regs.output_mask)) { + output_map[reg] = num_outputs++; + } +} + +void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { + vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); + gs_output_attributes = vs_output_attributes; + + semantic_maps.fill({16, 0}); + for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { + const std::array semantics{ + regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), + }; + for (u32 comp = 0; comp < 4; ++comp) { + const auto semantic = semantics[comp]; + if (static_cast(semantic) < 24) { + semantic_maps[static_cast(semantic)] = {attrib, comp}; + } else if (semantic != VSOutputAttributes::INVALID) { + LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); + } + } + } +} + +/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) +static bool IsPassThroughTevStage(const TevStageConfig& stage) { + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); +} + +static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { + const auto& state = config.state; + switch (texture_unit) { + case 0: + // Only unit 0 respects the texturing type + switch (state.texture0_type) { + case TexturingRegs::TextureConfig::Texture2D: + return "textureLod(sampler2D(tex0, tex0_sampler), texcoord0, getLod(texcoord0 * vec2(textureSize(sampler2D(tex0, tex0_sampler), 0))))"; + case TexturingRegs::TextureConfig::Projection2D: + // TODO (wwylele): find the exact LOD formula for projection texture + return "textureProj(sampler2D(tex0, tex0_sampler), vec3(texcoord0, texcoord0_w))"; + case TexturingRegs::TextureConfig::TextureCube: + return "texture(samplerCube(tex_cube, tex_cube_sampler), vec3(texcoord0, texcoord0_w))"; + case TexturingRegs::TextureConfig::Shadow2D: + return "shadowTexture(texcoord0, texcoord0_w)"; + case TexturingRegs::TextureConfig::ShadowCube: + return "shadowTextureCube(texcoord0, texcoord0_w)"; + case TexturingRegs::TextureConfig::Disabled: + return "vec4(0.0)"; + default: + LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); + UNIMPLEMENTED(); + return "texture(sampler2D(tex0, tex0_sampler), texcoord0)"; + } + case 1: + return "textureLod(sampler2D(tex1, tex1_sampler), texcoord1, getLod(texcoord1 * vec2(textureSize(sampler2D(tex1, tex1_sampler), 0))))"; + case 2: + if (state.texture2_use_coord1) + return "textureLod(sampler2D(tex2, tex2_sampler), texcoord1, getLod(texcoord1 * vec2(textureSize(sampler2D(tex2, tex2_sampler), 0))))"; + else + return "textureLod(sampler2D(tex2, tex2_sampler), texcoord2, getLod(texcoord2 * vec2(textureSize(sampler2D(tex2, tex2_sampler), 0))))"; + case 3: + if (state.proctex.enable) { + return "ProcTex()"; + } else { + LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it"); + return "vec4(0.0)"; + } + default: + UNREACHABLE(); + return ""; + } +} + +/// Writes the specified TEV stage source component(s) +static void AppendSource(std::string& out, const PicaFSConfig& config, + TevStageConfig::Source source, std::string_view index_name) { + using Source = TevStageConfig::Source; + switch (source) { + case Source::PrimaryColor: + out += "rounded_primary_color"; + break; + case Source::PrimaryFragmentColor: + out += "primary_fragment_color"; + break; + case Source::SecondaryFragmentColor: + out += "secondary_fragment_color"; + break; + case Source::Texture0: + out += SampleTexture(config, 0); + break; + case Source::Texture1: + out += SampleTexture(config, 1); + break; + case Source::Texture2: + out += SampleTexture(config, 2); + break; + case Source::Texture3: + out += SampleTexture(config, 3); + break; + case Source::PreviousBuffer: + out += "combiner_buffer"; + break; + case Source::Constant: + out += "const_color["; + out += index_name; + out += ']'; + break; + case Source::Previous: + out += "last_tex_env_out"; + break; + default: + out += "vec4(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source); + break; + } +} + +/// Writes the color components to use for the specified TEV stage color modifier +static void AppendColorModifier(std::string& out, const PicaFSConfig& config, + TevStageConfig::ColorModifier modifier, + TevStageConfig::Source source, std::string_view index_name) { + using ColorModifier = TevStageConfig::ColorModifier; + switch (modifier) { + case ColorModifier::SourceColor: + AppendSource(out, config, source, index_name); + out += ".rgb"; + break; + case ColorModifier::OneMinusSourceColor: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".rgb"; + break; + case ColorModifier::SourceAlpha: + AppendSource(out, config, source, index_name); + out += ".aaa"; + break; + case ColorModifier::OneMinusSourceAlpha: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".aaa"; + break; + case ColorModifier::SourceRed: + AppendSource(out, config, source, index_name); + out += ".rrr"; + break; + case ColorModifier::OneMinusSourceRed: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".rrr"; + break; + case ColorModifier::SourceGreen: + AppendSource(out, config, source, index_name); + out += ".ggg"; + break; + case ColorModifier::OneMinusSourceGreen: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".ggg"; + break; + case ColorModifier::SourceBlue: + AppendSource(out, config, source, index_name); + out += ".bbb"; + break; + case ColorModifier::OneMinusSourceBlue: + out += "vec3(1.0) - "; + AppendSource(out, config, source, index_name); + out += ".bbb"; + break; + default: + out += "vec3(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier); + break; + } +} + +/// Writes the alpha component to use for the specified TEV stage alpha modifier +static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config, + TevStageConfig::AlphaModifier modifier, + TevStageConfig::Source source, const std::string& index_name) { + using AlphaModifier = TevStageConfig::AlphaModifier; + switch (modifier) { + case AlphaModifier::SourceAlpha: + AppendSource(out, config, source, index_name); + out += ".a"; + break; + case AlphaModifier::OneMinusSourceAlpha: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".a"; + break; + case AlphaModifier::SourceRed: + AppendSource(out, config, source, index_name); + out += ".r"; + break; + case AlphaModifier::OneMinusSourceRed: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".r"; + break; + case AlphaModifier::SourceGreen: + AppendSource(out, config, source, index_name); + out += ".g"; + break; + case AlphaModifier::OneMinusSourceGreen: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".g"; + break; + case AlphaModifier::SourceBlue: + AppendSource(out, config, source, index_name); + out += ".b"; + break; + case AlphaModifier::OneMinusSourceBlue: + out += "1.0 - "; + AppendSource(out, config, source, index_name); + out += ".b"; + break; + default: + out += "0.0"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier); + break; + } +} + +/// Writes the combiner function for the color components for the specified TEV stage operation +static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, + std::string_view variable_name) { + out += "clamp("; + using Operation = TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + out += fmt::format("{}[0]", variable_name); + break; + case Operation::Modulate: + out += fmt::format("{0}[0] * {0}[1]", variable_name); + break; + case Operation::Add: + out += fmt::format("{0}[0] + {0}[1]", variable_name); + break; + case Operation::AddSigned: + out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name); + break; + case Operation::Lerp: + out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name); + break; + case Operation::Subtract: + out += fmt::format("{0}[0] - {0}[1]", variable_name); + break; + case Operation::MultiplyThenAdd: + out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); + break; + case Operation::AddThenMultiply: + out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name); + break; + case Operation::Dot3_RGB: + case Operation::Dot3_RGBA: + out += + fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name); + break; + default: + out += "vec3(0.0)"; + LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation); + break; + } + out += ", vec3(0.0), vec3(1.0))"; // Clamp result to 0.0, 1.0 +} + +/// Writes the combiner function for the alpha component for the specified TEV stage operation +static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, + std::string_view variable_name) { + out += "clamp("; + using Operation = TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + out += fmt::format("{}[0]", variable_name); + break; + case Operation::Modulate: + out += fmt::format("{0}[0] * {0}[1]", variable_name); + break; + case Operation::Add: + out += fmt::format("{0}[0] + {0}[1]", variable_name); + break; + case Operation::AddSigned: + out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name); + break; + case Operation::Lerp: + out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name); + break; + case Operation::Subtract: + out += fmt::format("{0}[0] - {0}[1]", variable_name); + break; + case Operation::MultiplyThenAdd: + out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); + break; + case Operation::AddThenMultiply: + out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name); + break; + default: + out += "0.0"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation); + break; + } + out += ", 0.0, 1.0)"; +} + +/// Writes the if-statement condition used to evaluate alpha testing +static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) { + using CompareFunc = FramebufferRegs::CompareFunc; + switch (func) { + case CompareFunc::Never: + out += "true"; + break; + case CompareFunc::Always: + out += "false"; + break; + case CompareFunc::Equal: + case CompareFunc::NotEqual: + case CompareFunc::LessThan: + case CompareFunc::LessThanOrEqual: + case CompareFunc::GreaterThan: + case CompareFunc::GreaterThanOrEqual: { + static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; + const auto index = static_cast(func) - static_cast(CompareFunc::Equal); + out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]); + break; + } + + default: + out += "false"; + LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func); + break; + } +} + +/// Writes the code to emulate the specified TEV stage +static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) { + const auto stage = + static_cast(config.state.tev_stages[index]); + if (!IsPassThroughTevStage(stage)) { + const std::string index_name = std::to_string(index); + + out += fmt::format("vec3 color_results_{}_1 = ", index_name); + AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); + out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name); + AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); + out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name); + AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); + out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, " + "color_results_{}_2, color_results_{}_3);\n", + index_name, index_name, index_name, index_name); + + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + out += fmt::format("vec3 color_output_{} = byteround(", index_name); + AppendColorCombiner(out, stage.color_op, "color_results_" + index_name); + out += ");\n"; + + if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name); + } else { + out += fmt::format("float alpha_results_{}[3] = float[3](", index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, + index_name); + out += ", "; + AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, + index_name); + out += ", "; + AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, + index_name); + out += ");\n"; + + out += fmt::format("float alpha_output_{} = byteround(", index_name); + AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name); + out += ");\n"; + } + + out += fmt::format("last_tex_env_out = vec4(" + "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " + "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", + index_name, stage.GetColorMultiplier(), index_name, + stage.GetAlphaMultiplier()); + } + + out += "combiner_buffer = next_combiner_buffer;\n"; + + if (config.TevStageUpdatesCombinerBufferColor(index)) + out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n"; + + if (config.TevStageUpdatesCombinerBufferAlpha(index)) + out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; +} + +/// Writes the code to emulate fragment lighting +static void WriteLighting(std::string& out, const PicaFSConfig& config) { + const auto& lighting = config.state.lighting; + + // Define lighting globals + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec3 light_vector = vec3(0.0);\n" + "vec3 refl_value = vec3(0.0);\n" + "vec3 spot_dir = vec3(0.0);\n" + "vec3 half_vector = vec3(0.0);\n" + "float dot_product = 0.0;\n" + "float clamp_highlights = 1.0;\n" + "float geo_factor = 1.0;\n"; + + // Compute fragment normals and tangents + const auto Perturbation = [&] { + return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); + }; + if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { + // Bump mapping is enabled using a normal map + out += fmt::format("vec3 surface_normal = {};\n", Perturbation()); + + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher + // precision result + if (lighting.bump_renorm) { + constexpr std::string_view val = + "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; + out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); + } + + // The tangent vector is not perturbed by the normal map and is just a unit vector. + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { + // Bump mapping is enabled using a tangent map + out += fmt::format("vec3 surface_tangent = {};\n", Perturbation()); + // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant + // computation below, which is also confirmed on 3DS. So we don't bother recomputing here + // even if 'renorm' is enabled. + + // The normal vector is not perturbed by the tangent map and is just a unit vector. + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + } else { + // No bump mapping - surface local normal and tangent are just unit vectors + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" + "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } + + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to + // eyespace. + out += "vec4 normalized_normquat = normalize(normquat);\n" + "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n" + "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; + + if (lighting.enable_shadow) { + std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); + if (lighting.shadow_invert) { + out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); + } else { + out += fmt::format("vec4 shadow = {};\n", shadow_texture); + } + } else { + out += "vec4 shadow = vec4(1.0);\n"; + } + + // Samples the specified lookup table for specular lighting + auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, + LightingRegs::LightingLutInput input, bool abs) { + std::string index; + switch (input) { + case LightingRegs::LightingLutInput::NH: + index = "dot(normal, normalize(half_vector))"; + break; + + case LightingRegs::LightingLutInput::VH: + index = "dot(normalize(view), normalize(half_vector))"; + break; + + case LightingRegs::LightingLutInput::NV: + index = "dot(normal, normalize(view))"; + break; + + case LightingRegs::LightingLutInput::LN: + index = "dot(light_vector, normal)"; + break; + + case LightingRegs::LightingLutInput::SP: + index = "dot(light_vector, spot_dir)"; + break; + + case LightingRegs::LightingLutInput::CP: + // CP input is only available with configuration 7 + if (lighting.config == LightingRegs::LightingConfig::Config7) { + // Note: even if the normal vector is modified by normal map, which is not the + // normal of the tangent plane anymore, the half angle vector is still projected + // using the modified normal vector. + constexpr std::string_view half_angle_proj = + "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; + // Note: the half angle vector projection is confirmed not normalized before the dot + // product. The result is in fact not cos(phi) as the name suggested. + index = fmt::format("dot({}, tangent)", half_angle_proj); + } else { + index = "0.0"; + } + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input); + UNIMPLEMENTED(); + index = "0.0"; + break; + } + + const auto sampler_index = static_cast(sampler); + + if (abs) { + // LUT index is in the range of (0.0, 1.0) + index = lighting.light[light_num].two_sided_diffuse + ? fmt::format("abs({})", index) + : fmt::format("max({}, 0.0)", index); + return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index); + } else { + // LUT index is in the range of (-1.0, 1.0) + return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index); + } + }; + + // Write the code to emulate each enabled light + for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { + const auto& light_config = lighting.light[light_index]; + const std::string light_src = fmt::format("light_src[{}]", light_config.num); + + // Compute light vector (directional or positional) + if (light_config.directional) { + out += fmt::format("light_vector = normalize({}.position);\n", light_src); + } else { + out += fmt::format("light_vector = normalize({}.position + view);\n", light_src); + } + + out += fmt::format("spot_dir = {}.spot_direction;\n", light_src); + out += "half_vector = normalize(view) + light_vector;\n"; + + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or + // two-sided + out += std::string("dot_product = ") + (light_config.two_sided_diffuse + ? "abs(dot(light_vector, normal));\n" + : "max(dot(light_vector, normal), 0.0);\n"); + + // If enabled, clamp specular component if lighting result is zero + if (lighting.clamp_highlights) { + out += "clamp_highlights = sign(dot_product);\n"; + } + + // If enabled, compute spot light attenuation value + std::string spot_atten = "1.0"; + if (light_config.spot_atten_enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { + const std::string value = + GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num), + light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); + spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value); + } + + // If enabled, compute distance attenuation value + std::string dist_atten = "1.0"; + if (light_config.dist_atten_enable) { + const std::string index = fmt::format("clamp({}.dist_atten_scale * length(-view - " + "{}.position) + {}.dist_atten_bias, 0.0, 1.0)", + light_src, light_src, light_src); + const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); + dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index); + } + + if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { + out += "geo_factor = dot(half_vector, half_vector);\n" + "geo_factor = geo_factor == 0.0 ? 0.0 : min(" + "dot_product / geo_factor, 1.0);\n"; + } + + // Specular 0 component + std::string d0_lut_value = "1.0"; + if (lighting.lut_d0.enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::Distribution0)) { + // Lookup specular "distribution 0" LUT value + const std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num, + lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value); + } + std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src); + if (light_config.geometric_factor_0) { + specular_0 = fmt::format("({} * geo_factor)", specular_0); + } + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.lut_rr.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectRed)) { + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num, + lighting.lut_rr.type, lighting.lut_rr.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value); + out += fmt::format("refl_value.r = {};\n", value); + } else { + out += "refl_value.r = 1.0;\n"; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.lut_rg.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectGreen)) { + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num, + lighting.lut_rg.type, lighting.lut_rg.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value); + out += fmt::format("refl_value.g = {};\n", value); + } else { + out += "refl_value.g = refl_value.r;\n"; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.lut_rb.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectBlue)) { + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num, + lighting.lut_rb.type, lighting.lut_rb.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value); + out += fmt::format("refl_value.b = {};\n", value); + } else { + out += "refl_value.b = refl_value.r;\n"; + } + + // Specular 1 component + std::string d1_lut_value = "1.0"; + if (lighting.lut_d1.enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::Distribution1)) { + // Lookup specular "distribution 1" LUT value + const std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num, + lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value); + } + std::string specular_1 = + fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src); + if (light_config.geometric_factor_1) { + specular_1 = fmt::format("({} * geo_factor)", specular_1); + } + + // Fresnel + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::Fresnel)) { + // Lookup fresnel LUT value + std::string value = + GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num, + lighting.lut_fr.type, lighting.lut_fr.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value); + + // Enabled for diffuse lighting alpha component + if (lighting.enable_primary_alpha) { + out += fmt::format("diffuse_sum.a = {};\n", value); + } + + // Enabled for the specular lighting alpha component + if (lighting.enable_secondary_alpha) { + out += fmt::format("specular_sum.a = {};\n", value); + } + } + + bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable; + bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable; + std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : ""; + std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : ""; + + // Compute primary fragment color (diffuse lighting) function + out += fmt::format( + "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n", + light_src, light_src, dist_atten, spot_atten, shadow_primary); + + // Compute secondary fragment color (specular lighting) function + out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n", + specular_0, specular_1, dist_atten, spot_atten, shadow_secondary); + } + + // Apply shadow attenuation to alpha components if enabled + if (lighting.shadow_alpha) { + if (lighting.enable_primary_alpha) { + out += "diffuse_sum.a *= shadow.a;\n"; + } + if (lighting.enable_secondary_alpha) { + out += "specular_sum.a *= shadow.a;\n"; + } + } + + // Sum final lighting result + out += "diffuse_sum.rgb += lighting_global_ambient;\n" + "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n" + "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; +} + +using ProcTexClamp = TexturingRegs::ProcTexClamp; +using ProcTexShift = TexturingRegs::ProcTexShift; +using ProcTexCombiner = TexturingRegs::ProcTexCombiner; +using ProcTexFilter = TexturingRegs::ProcTexFilter; + +static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, + ProcTexClamp clamp_mode) { + const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; + switch (mode) { + case ProcTexShift::None: + out += "0.0"; + break; + case ProcTexShift::Odd: + out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v); + break; + case ProcTexShift::Even: + out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); + out += "0.0"; + break; + } +} + +static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { + switch (mode) { + case ProcTexClamp::ToZero: + out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); + break; + case ProcTexClamp::ToEdge: + out += fmt::format("{0} = min({0}, 1.0);\n", var); + break; + case ProcTexClamp::SymmetricalRepeat: + out += fmt::format("{0} = fract({0});\n", var); + break; + case ProcTexClamp::MirroredRepeat: { + out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var); + break; + } + case ProcTexClamp::Pulse: + out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); + out += fmt::format("{0} = min({0}, 1.0);\n", var); + break; + } +} + +static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, + std::string_view offset) { + const auto combined = [combiner]() -> std::string_view { + switch (combiner) { + case ProcTexCombiner::U: + return "u"; + case ProcTexCombiner::U2: + return "(u * u)"; + case TexturingRegs::ProcTexCombiner::V: + return "v"; + case TexturingRegs::ProcTexCombiner::V2: + return "(v * v)"; + case TexturingRegs::ProcTexCombiner::Add: + return "((u + v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::Add2: + return "((u * u + v * v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::SqrtAdd2: + return "min(sqrt(u * u + v * v), 1.0)"; + case TexturingRegs::ProcTexCombiner::Min: + return "min(u, v)"; + case TexturingRegs::ProcTexCombiner::Max: + return "max(u, v)"; + case TexturingRegs::ProcTexCombiner::RMax: + return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; + default: + LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); + return "0.0"; + } + }(); + + out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); +} + +static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { + // LUT sampling uitlity + // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and + // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using + // value entries and difference entries. + out += R"( +float ProcTexLookupLUT(int offset, float coord) { + coord *= 128.0; + float index_i = clamp(floor(coord), 0.0, 127.0); + float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be + // extracted as index_i = 127.0 and index_f = 1.0 + vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; + return clamp(entry.r + entry.g * index_f, 0.0, 1.0); +} + )"; + + // Noise utility + if (config.state.proctex.noise_enable) { + // See swrasterizer/proctex.cpp for more information about these functions + out += R"( +int ProcTexNoiseRand1D(int v) { + const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); + return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; +} + +float ProcTexNoiseRand2D(vec2 point) { + const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); + int u2 = ProcTexNoiseRand1D(int(point.x)); + int v2 = ProcTexNoiseRand1D(int(point.y)); + v2 += ((u2 & 3) == 1) ? 4 : 0; + v2 ^= (u2 & 1) * 6; + v2 += 10 + u2; + v2 &= 0xF; + v2 ^= table[u2]; + return -1.0 + float(v2) * 2.0/ 15.0; +} + +float ProcTexNoiseCoef(vec2 x) { + vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); + vec2 point = floor(grid); + vec2 frac = grid - point; + + float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); + float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); + float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); + float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); + + float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); + float x0 = mix(g0, g1, x_noise); + float x1 = mix(g2, g3, x_noise); + return mix(x0, x1, y_noise); +} + )"; + } + + out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; + out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width); + // Offsets for level 4-7 seem to be hardcoded + out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", + config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, + config.state.proctex.lut_offset2, config.state.proctex.lut_offset3); + out += "int lut_offset = lut_offsets[level];\n"; + // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] + out += "lut_coord *= float(lut_width - 1);\n"; + + switch (config.state.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::LinearMipmapLinear: + case ProcTexFilter::LinearMipmapNearest: + out += "int lut_index_i = int(lut_coord) + lut_offset;\n"; + out += "float lut_index_f = fract(lut_coord);\n"; + out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + " + "proctex_lut_offset) + " + "lut_index_f * " + "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; + break; + case ProcTexFilter::Nearest: + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::NearestMipmapNearest: + out += "lut_coord += float(lut_offset);\n"; + out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " + "proctex_lut_offset);\n"; + break; + } + + out += "}\n"; + + out += "vec4 ProcTex() {\n"; + if (config.state.proctex.coord < 3) { + out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord); + } else { + LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); + out += "vec2 uv = abs(texcoord0);\n"; + } + + // This LOD formula is the same as the LOD upper limit defined in OpenGL. + // f(x, y) <= m_u + m_v + m_w + // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) + // Note: this is different from the one normal 2D textures use. + out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; + // unlike normal texture, the bias is inside the log2 + out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", + config.state.proctex.lut_width); + out += "if (proctex_bias == 0.0) lod = 0.0;\n"; + out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", + std::max(0.0f, static_cast(config.state.proctex.lod_min)), + std::min(7.0f, static_cast(config.state.proctex.lod_max))); + // Get shift offset before noise generation + out += "float u_shift = "; + AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, + config.state.proctex.u_clamp); + out += ";\n"; + out += "float v_shift = "; + AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, + config.state.proctex.v_clamp); + out += ";\n"; + + // Generate noise + if (config.state.proctex.noise_enable) { + out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" + "uv = abs(uv);\n"; + } + + // Shift + out += "float u = uv.x + u_shift;\n" + "float v = uv.y + v_shift;\n"; + + // Clamp + AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); + AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); + + // Combine and map + out += "float lut_coord = "; + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, + "proctex_color_map_offset"); + out += ";\n"; + + switch (config.state.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::Nearest: + out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; + break; + case ProcTexFilter::NearestMipmapNearest: + case ProcTexFilter::LinearMipmapNearest: + out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n"; + break; + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::LinearMipmapLinear: + out += "int lod_i = int(lod);\n" + "float lod_f = fract(lod);\n" + "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), " + "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n"; + break; + } + + if (config.state.proctex.separate_alpha) { + // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It + // uses the output of CombineAndMap directly instead. + out += "float final_alpha = "; + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, + "proctex_alpha_map_offset"); + out += ";\n"; + out += "return vec4(final_color.xyz, final_alpha);\n}\n"; + } else { + out += "return final_color;\n}\n"; + } +} + +std::string GenerateFragmentShader(const PicaFSConfig& config) { + const auto& state = config.state; + std::string out = "#version 450 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += GetVertexInterfaceDeclaration(false); + + out += R"( +in vec4 gl_FragCoord; + +layout (location = 0) out vec4 color; + +layout(set = 0, binding = 2) uniform samplerBuffer texture_buffer_lut_lf; +layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_rg; +layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rgba; + +layout(set = 1, binding = 0) uniform texture2D tex0; +layout(set = 1, binding = 1) uniform texture2D tex1; +layout(set = 1, binding = 2) uniform texture2D tex2; +layout(set = 1, binding = 3) uniform textureCube tex_cube; + +layout(set = 2, binding = 0) uniform sampler tex0_sampler; +layout(set = 2, binding = 1) uniform sampler tex1_sampler; +layout(set = 2, binding = 2) uniform sampler tex2_sampler; +layout(set = 2, binding = 3) uniform sampler tex_cube_sampler; + +layout(set = 3, binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; +layout(set = 3, binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; +layout(set = 3, binding = 2, r32ui) uniform readonly uimage2D shadow_texture_py; +layout(set = 3, binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; +layout(set = 3, binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; +layout(set = 3, binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; +layout(set = 3, binding = 6, r32ui) uniform uimage2D shadow_buffer; +)"; + + out += UniformBlockDef; + + out += R"( +// Rotate the vector v by the quaternion q +vec3 quaternion_rotate(vec4 q, vec3 v) { + return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); +} + +float LookupLightingLUT(int lut_index, int index, float delta) { + vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; + return entry.r + entry.g * delta; +} + +float LookupLightingLUTUnsigned(int lut_index, float pos) { + int index = clamp(int(pos * 256.0), 0, 255); + float delta = pos * 256.0 - float(index); + return LookupLightingLUT(lut_index, index, delta); +} + +float LookupLightingLUTSigned(int lut_index, float pos) { + int index = clamp(int(pos * 128.0), -128, 127); + float delta = pos * 128.0 - float(index); + if (index < 0) index += 256; + return LookupLightingLUT(lut_index, index, delta); +} + +float byteround(float x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec2 byteround(vec2 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec3 byteround(vec3 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec4 byteround(vec4 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +// PICA's LOD formula for 2D textures. +// This LOD formula is the same as the LOD lower limit defined in OpenGL. +// f(x, y) >= max{m_u, m_v, m_w} +// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) +float getLod(vec2 coord) { + vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord))); + return log2(max(d.x, d.y)); +} + +uvec2 DecodeShadow(uint pixel) { + return uvec2(pixel >> 8, pixel & 0xFFu); +} + +uint EncodeShadow(uvec2 pixel) { + return (pixel.x << 8) | pixel.y; +} + +float CompareShadow(uint pixel, uint z) { + uvec2 p = DecodeShadow(pixel); + return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); +} + +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + return 1.0; + return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); +} + +float mix2(vec4 s, vec2 a) { + vec2 t = mix(s.xy, s.zw, a.yy); + return mix(t.x, t.y, a.x); +} + +vec4 shadowTexture(vec2 uv, float w) { +)"; + if (!config.state.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; + out += R"( + vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i = ivec2(coord_floor); + vec4 s = vec4( + SampleShadow2D(i , z), + SampleShadow2D(i + ivec2(1, 0), z), + SampleShadow2D(i + ivec2(0, 1), z), + SampleShadow2D(i + ivec2(1, 1), z)); + return vec4(mix2(s, f)); +} + +vec4 shadowTextureCube(vec2 uv, float w) { + ivec2 size = imageSize(shadow_texture_px); + vec3 c = vec3(uv, w); + vec3 a = abs(c); + if (a.x > a.y && a.x > a.z) { + w = a.x; + uv = -c.zy; + if (c.x < 0.0) uv.x = -uv.x; + } else if (a.y > a.z) { + w = a.y; + uv = c.xz; + if (c.y < 0.0) uv.y = -uv.y; + } else { + w = a.z; + uv = -c.xy; + if (c.z > 0.0) uv.x = -uv.x; + } +)"; + out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; + out += R"( + vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i00 = ivec2(coord_floor); + ivec2 i10 = i00 + ivec2(1, 0); + ivec2 i01 = i00 + ivec2(0, 1); + ivec2 i11 = i00 + ivec2(1, 1); + ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); + i00 = clamp(i00, cmin, cmax); + i10 = clamp(i10, cmin, cmax); + i01 = clamp(i01, cmin, cmax); + i11 = clamp(i11, cmin, cmax); + uvec4 pixels; + // This part should have been refactored into functions, + // but many drivers don't like passing uimage2D as parameters + if (a.x > a.y && a.x > a.z) { + if (c.x > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_px, i00).r, + imageLoad(shadow_texture_px, i10).r, + imageLoad(shadow_texture_px, i01).r, + imageLoad(shadow_texture_px, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nx, i00).r, + imageLoad(shadow_texture_nx, i10).r, + imageLoad(shadow_texture_nx, i01).r, + imageLoad(shadow_texture_nx, i11).r); + } else if (a.y > a.z) { + if (c.y > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_py, i00).r, + imageLoad(shadow_texture_py, i10).r, + imageLoad(shadow_texture_py, i01).r, + imageLoad(shadow_texture_py, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_ny, i00).r, + imageLoad(shadow_texture_ny, i10).r, + imageLoad(shadow_texture_ny, i01).r, + imageLoad(shadow_texture_ny, i11).r); + } else { + if (c.z > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_pz, i00).r, + imageLoad(shadow_texture_pz, i10).r, + imageLoad(shadow_texture_pz, i01).r, + imageLoad(shadow_texture_pz, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nz, i00).r, + imageLoad(shadow_texture_nz, i10).r, + imageLoad(shadow_texture_nz, i01).r, + imageLoad(shadow_texture_nz, i11).r); + } + vec4 s = vec4( + CompareShadow(pixels.x, z), + CompareShadow(pixels.y, z), + CompareShadow(pixels.z, z), + CompareShadow(pixels.w, z)); + return vec4(mix2(s, f)); +} +)"; + + if (config.state.proctex.enable) + AppendProcTexSampler(out, config); + + // We round the interpolated primary color to the nearest 1/255th + // This maintains the PICA's 8 bits of precision + out += R"( +void main() { +vec4 rounded_primary_color = byteround(primary_color); +vec4 primary_fragment_color = vec4(0.0); +vec4 secondary_fragment_color = vec4(0.0); +)"; + + // Do not do any sort of processing if it's obvious we're not going to pass the alpha test + if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { + out += "discard; }"; + return out; + } + + // Append the scissor test + if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) { + out += "if ("; + // Negate the condition if we have to keep only the pixels outside the scissor box + if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { + out += '!'; + } + out += "(gl_FragCoord.x >= float(scissor_x1) && " + "gl_FragCoord.y >= float(scissor_y1) && " + "gl_FragCoord.x < float(scissor_x2) && " + "gl_FragCoord.y < float(scissor_y2))) discard;\n"; + } + + // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use + // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then + // do our own transformation according to PICA specification. + out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n" + "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + + if (state.lighting.enable) + WriteLighting(out, config); + + out += "vec4 combiner_buffer = vec4(0.0);\n" + "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n" + "vec4 last_tex_env_out = vec4(0.0);\n"; + + for (std::size_t index = 0; index < state.tev_stages.size(); ++index) { + WriteTevStage(out, config, static_cast(index)); + } + + if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) { + out += "if ("; + AppendAlphaTestCondition(out, state.alpha_test_func); + out += ") discard;\n"; + } + + // Append fog combiner + if (state.fog_mode == TexturingRegs::FogMode::Fog) { + // Get index into fog LUT + if (state.fog_flip) { + out += "float fog_index = (1.0 - float(depth)) * 128.0;\n"; + } else { + out += "float fog_index = depth * 128.0;\n"; + } + + // Generate clamped fog factor from LUT for given fog index + out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" + "float fog_f = fog_index - fog_i;\n" + "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " + "fog_lut_offset).rg;\n" + "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" + "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; + + // Blend the fog + out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; + } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { + Core::System::GetInstance().TelemetrySession().AddField( + Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); + LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); + out += "discard; }"; + return out; + } + + if (state.shadow_rendering) { + out += R"( +uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); +uint s = uint(last_tex_env_out.g * float(0xFF)); +ivec2 image_coord = ivec2(gl_FragCoord.xy); + +uint old = imageLoad(shadow_buffer, image_coord).x; +uint new; +uint old2; +do { + old2 = old; + + uvec2 ref = DecodeShadow(old); + if (d < ref.x) { + if (s == 0u) { + ref.x = d; + } else { + s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); + ref.y = min(s, ref.y); + } + } + new = EncodeShadow(ref); + +} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2); +)"; + } else { + out += "gl_FragDepth = depth;\n"; + // Round the final fragment color to maintain the PICA's 8 bits of precision + out += "color = byteround(last_tex_env_out);\n"; + } + + out += '}'; + return out; +} + +std::string GenerateTrivialVertexShader() { + std::string out = "#version 450 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; + out += + fmt::format("layout(location = {}) in vec4 vert_position;\n" + "layout(location = {}) in vec4 vert_color;\n" + "layout(location = {}) in vec2 vert_texcoord0;\n" + "layout(location = {}) in vec2 vert_texcoord1;\n" + "layout(location = {}) in vec2 vert_texcoord2;\n" + "layout(location = {}) in float vert_texcoord0_w;\n" + "layout(location = {}) in vec4 vert_normquat;\n" + "layout(location = {}) in vec3 vert_view;\n", + ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); + + out += GetVertexInterfaceDeclaration(true); + + out += UniformBlockDef; + + out += R"( + +void main() { + primary_color = vert_color; + texcoord0 = vert_texcoord0; + texcoord1 = vert_texcoord1; + texcoord2 = vert_texcoord2; + texcoord0_w = vert_texcoord0_w; + normquat = vert_normquat; + view = vert_view; + gl_Position = vert_position; + + gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + if (enable_clip1) { + gl_ClipDistance[1] = dot(clip_coef, vert_position); + } else { + gl_ClipDistance[1] = 0; + } +} +)"; + + return out; +} + +std::optional GenerateVertexShader( + const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config) { + /*std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; + out += ShaderDecompiler::GetCommonDeclarations(); + + std::array used_regs{}; + const auto get_input_reg = [&used_regs](u32 reg) { + ASSERT(reg < 16); + used_regs[reg] = true; + return fmt::format("vs_in_reg{}", reg); + }; + + const auto get_output_reg = [&](u32 reg) -> std::string { + ASSERT(reg < 16); + if (config.state.output_map[reg] < config.state.num_outputs) { + return fmt::format("vs_out_attr{}", config.state.output_map[reg]); + } + return ""; + }; + + auto program_source_opt = ShaderDecompiler::DecompileProgram( + setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg, + get_output_reg, config.state.sanitize_mul); + + if (!program_source_opt) + return std::nullopt; + + std::string& program_source = program_source_opt->code; + + out += R"( +#define uniforms vs_uniforms +layout (std140) uniform vs_config { + pica_uniforms uniforms; +}; + +)"; + // input attributes declaration + for (std::size_t i = 0; i < used_regs.size(); ++i) { + if (used_regs[i]) { + out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i); + } + } + out += '\n'; + + // output attributes declaration + for (u32 i = 0; i < config.state.num_outputs; ++i) { + out += "layout(location = " + std::to_string(i) + ") out vec4 vs_out_attr" + std::to_string(i) + ";\n"; + } + + out += "\nvoid main() {\n"; + for (u32 i = 0; i < config.state.num_outputs; ++i) { + out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); + } + out += "\n exec_shader();\n}\n\n"; + + out += program_source; + + return {{std::move(out)}};*/ + return std::nullopt; +} + +static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config) { + /*std::string out = GetVertexInterfaceDeclaration(true, separable_shader); + out += UniformBlockDef; + out += ShaderDecompiler::GetCommonDeclarations(); + + out += '\n'; + for (u32 i = 0; i < config.vs_output_attributes; ++i) { + out += ("layout(location = " + std::to_string(i) + ") in vec4 vs_out_attr" + std::to_string(i) + "[];\n"; + } + + out += R"( +struct Vertex { +)"; + out += fmt::format(" vec4 attributes[{}];\n", config.gs_output_attributes); + out += "};\n\n"; + + const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string { + const u32 slot = static_cast(slot_semantic); + const u32 attrib = config.semantic_maps[slot].attribute_index; + const u32 comp = config.semantic_maps[slot].component_index; + if (attrib < config.gs_output_attributes) { + return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]); + } + return "0.0"; + }; + + out += "vec4 GetVertexQuaternion(Vertex vtx) {\n"; + out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + + semantic(VSOutputAttributes::QUATERNION_Z) + ", " + + semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; + out += "}\n\n"; + + out += "void EmitVtx(Vertex vtx, bool quats_opposite) {\n"; + out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + + semantic(VSOutputAttributes::POSITION_Y) + ", " + + semantic(VSOutputAttributes::POSITION_Z) + ", " + + semantic(VSOutputAttributes::POSITION_W) + ");\n"; + out += " gl_Position = vtx_pos;\n"; + out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n"; + out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 + out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; + out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n"; + + out += " vec4 vtx_quat = GetVertexQuaternion(vtx);\n"; + out += " normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n"; + + out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + + semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + + ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; + out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; + + out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; + out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; + + out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; + out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + + semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + + ");\n\n"; + + out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; + + out += " EmitVertex();\n"; + out += "}\n"; + + out += R"( +bool AreQuaternionsOpposite(vec4 qa, vec4 qb) { + return (dot(qa, qb) < 0.0); +} + +void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { + EmitVtx(vtx0, false); + EmitVtx(vtx1, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx1))); + EmitVtx(vtx2, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx2))); + EndPrimitive(); +} +)"; + + return out;*/ + return ""; +}; + +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) { + std::string out = "#version 450 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; + + out += R"( +layout(triangles) in; +layout(triangle_strip, max_vertices = 3) out; + +)"; + + out += GetGSCommonSource(config.state); + + out += R"( +void main() { + Vertex prim_buffer[3]; +)"; + for (u32 vtx = 0; vtx < 3; ++vtx) { + out += fmt::format(" prim_buffer[{}].attributes = vec4[{}](", vtx, + config.state.gs_output_attributes); + for (u32 i = 0; i < config.state.vs_output_attributes; ++i) { + out += fmt::format("{}vs_out_attr{}[{}]", i == 0 ? "" : ", ", i, vtx); + } + out += ");\n"; + } + out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n"; + out += "}\n"; + + return out; +} +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h new file mode 100644 index 000000000..7686edc25 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -0,0 +1,247 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include "common/hash.h" +#include "video_core/regs.h" +#include "video_core/shader/shader.h" + +namespace Vulkan { + +enum Attributes { + ATTRIBUTE_POSITION, + ATTRIBUTE_COLOR, + ATTRIBUTE_TEXCOORD0, + ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, +}; + +// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() +struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { + Pica::TexturingRegs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } +}; + +struct PicaFSConfigState { + Pica::FramebufferRegs::CompareFunc alpha_test_func; + Pica::RasterizerRegs::ScissorMode scissor_test_mode; + Pica::TexturingRegs::TextureConfig::TextureType texture0_type; + bool texture2_use_coord1; + std::array tev_stages; + u8 combiner_buffer_input; + + Pica::RasterizerRegs::DepthBuffering depthmap_enable; + Pica::TexturingRegs::FogMode fog_mode; + bool fog_flip; + bool alphablend_enable; + Pica::FramebufferRegs::LogicOp logic_op; + + struct { + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; + bool shadow_enable; + } light[8]; + + bool enable; + unsigned src_num; + Pica::LightingRegs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::LightingRegs::LightingConfig config; + bool enable_primary_alpha; + bool enable_secondary_alpha; + + bool enable_shadow; + bool shadow_primary; + bool shadow_secondary; + bool shadow_invert; + bool shadow_alpha; + unsigned shadow_selector; + + struct { + bool enable; + bool abs_input; + Pica::LightingRegs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + struct { + bool enable; + u32 coord; + Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; + Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; + bool separate_alpha; + bool noise_enable; + Pica::TexturingRegs::ProcTexShift u_shift, v_shift; + u32 lut_width; + u32 lut_offset0; + u32 lut_offset1; + u32 lut_offset2; + u32 lut_offset3; + u32 lod_min; + u32 lod_max; + Pica::TexturingRegs::ProcTexFilter lut_filter; + } proctex; + + bool shadow_rendering; + bool shadow_texture_orthographic; +}; + +/** + * This struct contains all state used to generate the GLSL fragment shader that emulates the + * current Pica register configuration. This struct is used as a cache key for generated GLSL shader + * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by + * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where + * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) + * two separate shaders sharing the same key. + */ +struct PicaFSConfig : Common::HashableStruct { + + /// Construct a PicaFSConfig with the given Pica register configuration. + static PicaFSConfig BuildFromRegs(const Pica::Regs& regs); + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); + } +}; + +/** + * This struct contains common information to identify a GL vertex/geometry shader generated from + * PICA vertex/geometry shader. + */ +struct PicaShaderConfigCommon { + void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); + + u64 program_hash; + u64 swizzle_hash; + u32 main_offset; + bool sanitize_mul; + + u32 num_outputs; + + // output_map[output register index] -> output attribute index + std::array output_map; +}; + +/** + * This struct contains information to identify a GL vertex shader generated from PICA vertex + * shader. + */ +struct PicaVSConfig : Common::HashableStruct { + explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + state.Init(regs, setup); + } + explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { + state = conf; + } +}; + +struct PicaGSConfigCommonRaw { + void Init(const Pica::Regs& regs); + + u32 vs_output_attributes; + u32 gs_output_attributes; + + struct SemanticMap { + u32 attribute_index; + u32 component_index; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps; +}; + +/** + * This struct contains information to identify a GL geometry shader generated from PICA no-geometry + * shader pipeline + */ +struct PicaFixedGSConfig : Common::HashableStruct { + explicit PicaFixedGSConfig(const Pica::Regs& regs) { + state.Init(regs); + } +}; + +/** + * Generates the GLSL vertex shader program source code that accepts vertices from software shader + * and directly passes them to the fragment shader. + * @param separable_shader generates shader that can be used for separate shader object + * @returns String of the shader source code + */ +std::string GenerateTrivialVertexShader(); + +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code; boost::none on failure + */ +std::optional GenerateVertexShader( + const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config); + +/** + * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline + * @returns String of the shader source code + */ +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config); + +/** + * Generates the GLSL fragment shader program source code for the current Pica state + * @param config ShaderCacheKey object generated for the current Pica state, used for the shader + * configuration (NOTE: Use state in this struct only, not the Pica registers!) + * @param separable_shader generates shader that can be used for separate shader object + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const PicaFSConfig& config); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..d8f4aee34 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,194 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) { + std::pair result{}; + switch (usage) { + case vk::BufferUsageFlagBits::eVertexBuffer: + result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead, + vk::PipelineStageFlagBits::eVertexInput); + break; + case vk::BufferUsageFlagBits::eIndexBuffer: + result = std::make_pair(vk::AccessFlagBits::eIndexRead, + vk::PipelineStageFlagBits::eVertexInput); + case vk::BufferUsageFlagBits::eUniformBuffer: + result = std::make_pair(vk::AccessFlagBits::eUniformRead, + vk::PipelineStageFlagBits::eVertexShader | + vk::PipelineStageFlagBits::eGeometryShader | + vk::PipelineStageFlagBits::eFragmentShader); + case vk::BufferUsageFlagBits::eUniformTexelBuffer: + result = std::make_pair(vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eFragmentShader); + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage); + } + + return result; +} + +StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage) + : instance{instance} { + const vk::BufferCreateInfo buffer_info = { + .size = size, + .usage = usage + }; + + const VmaAllocationCreateInfo alloc_create_info = { + .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST + }; + + VkBuffer unsafe_buffer = VK_NULL_HANDLE; + VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); + VmaAllocationInfo alloc_info; + VmaAllocator allocator = instance.GetAllocator(); + + vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, + &unsafe_buffer, &allocation, &alloc_info); + + buffer = vk::Buffer{unsafe_buffer}; + mapped = std::span{reinterpret_cast(alloc_info.pMappedData), size}; +} + +StagingBuffer::~StagingBuffer() { + vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); +} + +StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, + u32 size, vk::BufferUsageFlagBits usage, std::span view_formats) + : instance{instance}, scheduler{scheduler}, staging{instance, size, vk::BufferUsageFlagBits::eTransferSrc}, + usage{usage}, total_size{size} { + + const vk::BufferCreateInfo buffer_info = { + .size = total_size, + .usage = usage | vk::BufferUsageFlagBits::eTransferDst + }; + + const VmaAllocationCreateInfo alloc_create_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE + }; + + VkBuffer unsafe_buffer = VK_NULL_HANDLE; + VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); + VmaAllocationInfo alloc_info; + VmaAllocator allocator = instance.GetAllocator(); + + vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, + &unsafe_buffer, &allocation, &alloc_info); + + buffer = vk::Buffer{unsafe_buffer}; + + ASSERT(view_formats.size() < MAX_BUFFER_VIEWS); + + vk::Device device = instance.GetDevice(); + for (std::size_t i = 0; i < view_formats.size(); i++) { + const vk::BufferViewCreateInfo view_info = { + .buffer = buffer, + .format = view_formats[i], + .offset = 0, + .range = total_size + }; + + views[i] = device.createBufferView(view_info); + } + + view_count = view_formats.size(); + bucket_size = size / SCHEDULER_COMMAND_COUNT; +} + +StreamBuffer::~StreamBuffer() { + if (buffer) { + vk::Device device = instance.GetDevice(); + vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); + for (std::size_t i = 0; i < view_count; i++) { + device.destroyBufferView(views[i]); + } + } +} + +std::tuple StreamBuffer::Map(u32 size, u32 alignment) { + ASSERT(size <= total_size && alignment <= total_size); + + const u32 current_bucket = scheduler.GetCurrentSlotIndex(); + auto& bucket = buckets[current_bucket]; + if (bucket.offset + size > bucket_size) { + UNREACHABLE(); + } + + bool invalidate = false; + if (bucket.invalid) { + invalidate = true; + bucket.invalid = false; + } + + const u32 buffer_offset = current_bucket * bucket_size + bucket.offset; + u8* mapped = reinterpret_cast(staging.mapped.data() + buffer_offset); + return std::make_tuple(mapped, buffer_offset, invalidate); + +} + +void StreamBuffer::Commit(u32 size) { + buckets[scheduler.GetCurrentSlotIndex()].offset += size; +} + +void StreamBuffer::Flush() { + const u32 current_bucket = scheduler.GetCurrentSlotIndex(); + const u32 flush_size = buckets[current_bucket].offset; + ASSERT(flush_size <= bucket_size); + + if (flush_size > 0) { + vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer(); + VmaAllocator allocator = instance.GetAllocator(); + + const u32 flush_start = current_bucket * bucket_size; + const vk::BufferCopy copy_region = { + .srcOffset = flush_start, + .dstOffset = flush_start, + .size = flush_size + }; + + vmaFlushAllocation(allocator, allocation, flush_start, flush_size); + command_buffer.copyBuffer(staging.buffer, buffer, copy_region); + + // Add pipeline barrier for the flushed region + auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage); + const vk::BufferMemoryBarrier buffer_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = access_mask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buffer, + .offset = flush_start, + .size = flush_size + }; + + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, + vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {}); + } + + // Reset the offset of the next bucket + const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT; + buckets[next_bucket].offset = 0; + buckets[next_bucket].invalid = true; +} + +u32 StreamBuffer::GetBufferOffset() const { + const u32 current_bucket = scheduler.GetCurrentSlotIndex(); + return current_bucket * bucket_size + buckets[current_bucket].offset; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..c7aefb1be --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,91 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class TaskScheduler; + +constexpr u32 MAX_BUFFER_VIEWS = 3; + +struct LockedRegion { + u32 size = 0; + u64 fence_counter = 0; +}; + +struct StagingBuffer { + StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage); + ~StagingBuffer(); + + const Instance& instance; + vk::Buffer buffer{}; + VmaAllocation allocation{}; + std::span mapped{}; +}; + +class StreamBuffer { +public: + StreamBuffer(const Instance& instance, TaskScheduler& scheduler, + u32 size, vk::BufferUsageFlagBits usage, std::span views); + ~StreamBuffer(); + + std::tuple Map(u32 size, u32 alignment = 0); + + /// Commits size bytes from the currently mapped staging memory + void Commit(u32 size = 0); + + /// Flushes staging memory to the GPU buffer + void Flush(); + + /// Returns the current buffer offset + u32 GetBufferOffset() const; + + /// Returns the Vulkan buffer handle + vk::Buffer GetHandle() const { + return buffer; + } + + /// Returns an immutable reference to the requested buffer view + const vk::BufferView& GetView(u32 index = 0) const { + ASSERT(index < view_count); + return views[index]; + } + +private: + /// Invalidates the buffer offsets + void Invalidate(); + + /// Removes the lock on regions whose fence counter has been reached by the GPU + bool UnlockFreeRegions(u32 target_size); + +private: + struct Bucket { + bool invalid; + u32 fence_counter; + u32 offset; + }; + + const Instance& instance; + TaskScheduler& scheduler; + StagingBuffer staging; + + vk::Buffer buffer{}; + VmaAllocation allocation{}; + vk::BufferUsageFlagBits usage; + u32 total_size = 0; + std::array views{}; + std::size_t view_count = 0; + + u32 bucket_size = 0; + std::array buckets{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp new file mode 100644 index 000000000..361b12628 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -0,0 +1,232 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" + +namespace Vulkan { + +Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache) + : instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} { + + // Set the surface format early for RenderpassCache to create the present renderpass + Configure(0, 0); + renderpass_cache.CreatePresentRenderpass(surface_format.format); +} + +Swapchain::~Swapchain() { + vk::Device device = instance.GetDevice(); + device.destroySwapchainKHR(swapchain); + + for (auto& image : swapchain_images) { + device.destroyImageView(image.image_view); + device.destroyFramebuffer(image.framebuffer); + } +} + +void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) { + is_outdated = false; + is_suboptimal = false; + + // Fetch information about the provided surface + Configure(width, height); + + const std::array queue_family_indices = { + instance.GetGraphicsQueueFamilyIndex(), + instance.GetPresentQueueFamilyIndex(), + }; + + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; + const u32 queue_family_indices_count = exclusive ? 1u : 2u; + const vk::SharingMode sharing_mode = + exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent; + const vk::SwapchainCreateInfoKHR swapchain_info = { + .surface = surface, + .minImageCount = image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = extent, + .imageArrayLayers = 1, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment, + .imageSharingMode = sharing_mode, + .queueFamilyIndexCount = queue_family_indices_count, + .pQueueFamilyIndices = queue_family_indices.data(), + .preTransform = transform, + .presentMode = present_mode, + .clipped = true, + .oldSwapchain = swapchain + }; + + vk::Device device = instance.GetDevice(); + vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info); + + // If an old swapchain exists, destroy it and move the new one to its place. + if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) { + device.destroySwapchainKHR(old_swapchain); + } + + vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass(); + auto images = device.getSwapchainImagesKHR(swapchain); + + // Destroy the previous images + for (auto& image : swapchain_images) { + device.destroyImageView(image.image_view); + device.destroyFramebuffer(image.framebuffer); + } + + swapchain_images.clear(); + swapchain_images.resize(images.size()); + + std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image { + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = vk::ImageViewType::e2D, + .format = surface_format.format, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + } + }; + + vk::ImageView image_view = device.createImageView(view_info); + const std::array attachments{image_view}; + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = present_renderpass, + .attachmentCount = 1, + .pAttachments = attachments.data(), + .width = extent.width, + .height = extent.height, + .layers = 1 + }; + + vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info); + + return Image{ + .image = image, + .image_view = image_view, + .framebuffer = framebuffer + }; + }); +} + +// Wait for maximum of 1 second +constexpr u64 ACQUIRE_TIMEOUT = 1000000000; + +void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) { + vk::Device device = instance.GetDevice(); + vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired, + VK_NULL_HANDLE, ¤t_image); + switch (result) { + case vk::Result::eSuccess: + break; + case vk::Result::eSuboptimalKHR: + is_suboptimal = true; + break; + case vk::Result::eErrorOutOfDateKHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result"); + break; + } +} + +void Swapchain::Present(vk::Semaphore wait_for_present) { + const vk::PresentInfoKHR present_info = { + .waitSemaphoreCount = 1, + .pWaitSemaphores = &wait_for_present, + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = ¤t_image + }; + + vk::Queue present_queue = instance.GetPresentQueue(); + vk::Result result = present_queue.presentKHR(present_info); + + switch (result) { + case vk::Result::eSuccess: + break; + case vk::Result::eSuboptimalKHR: + LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); + break; + case vk::Result::eErrorOutOfDateKHR: + is_outdated = true; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed"); + break; + } + + current_frame = (current_frame + 1) % swapchain_images.size(); +} + +void Swapchain::Configure(u32 width, u32 height) { + vk::PhysicalDevice physical = instance.GetPhysicalDevice(); + + // Choose surface format + auto formats = physical.getSurfaceFormatsKHR(surface); + surface_format = formats[0]; + + if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { + surface_format.format = vk::Format::eB8G8R8A8Unorm; + } else { + auto it = std::ranges::find_if(formats, [](vk::SurfaceFormatKHR format) -> bool { + return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear && + format.format == vk::Format::eB8G8R8A8Unorm; + }); + + if (it == formats.end()) { + LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!"); + } else { + surface_format = *it; + } + } + + // Checks if a particular mode is supported, if it is, returns that mode. + auto modes = physical.getSurfacePresentModesKHR(surface); + + // FIFO is guaranteed by the Vulkan standard to be available + present_mode = vk::PresentModeKHR::eFifo; + auto iter = std::ranges::find_if(modes, [](vk::PresentModeKHR mode) { + return vk::PresentModeKHR::eMailbox == mode; + }); + + // Prefer Mailbox if present for lowest latency + if (iter != modes.end()) { + present_mode = vk::PresentModeKHR::eMailbox; + } + + // Query surface extent + auto capabilities = physical.getSurfaceCapabilitiesKHR(surface); + extent = capabilities.currentExtent; + + if (capabilities.currentExtent.width == std::numeric_limits::max()) { + extent.width = std::clamp(width, capabilities.minImageExtent.width, + capabilities.maxImageExtent.width); + extent.height = std::clamp(height, capabilities.minImageExtent.height, + capabilities.maxImageExtent.height); + } + + // Select number of images in swap chain, we prefer one buffer in the background to work on + image_count = capabilities.minImageCount + 1; + if (capabilities.maxImageCount > 0) { + image_count = std::min(image_count, capabilities.maxImageCount); + } + + // Prefer identity transform if possible + transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; + if (!(capabilities.supportedTransforms & transform)) { + transform = capabilities.currentTransform; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h new file mode 100644 index 000000000..7116b49a8 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -0,0 +1,91 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class RenderpassCache; + +class Swapchain { +public: + Swapchain(const Instance& instance, RenderpassCache& renderpass_cache); + ~Swapchain(); + + /// Creates (or recreates) the swapchain with a given size. + void Create(u32 width, u32 height, bool vsync_enabled); + + /// Acquires the next image in the swapchain. + void AcquireNextImage(vk::Semaphore signal_acquired); + + /// Presents the current image and move to the next one + void Present(vk::Semaphore wait_for_present); + + /// Returns current swapchain state + vk::Extent2D GetExtent() const { + return extent; + } + + /// Returns the swapchain surface + vk::SurfaceKHR GetSurface() const { + return surface; + } + + /// Returns the current framebuffe + vk::Framebuffer GetFramebuffer() const { + return swapchain_images[current_image].framebuffer; + } + + /// Returns the swapchain format + vk::SurfaceFormatKHR GetSurfaceFormat() const { + return surface_format; + } + + /// Returns the Vulkan swapchain handle + vk::SwapchainKHR GetHandle() const { + return swapchain; + } + + /// Returns true when the swapchain should be recreated + bool NeedsRecreation() const { + return is_suboptimal || is_outdated; + } + +private: + void Configure(u32 width, u32 height); + +private: + const Instance& instance; + RenderpassCache& renderpass_cache; + vk::SwapchainKHR swapchain{}; + vk::SurfaceKHR surface{}; + + // Swapchain properties + vk::SurfaceFormatKHR surface_format; + vk::PresentModeKHR present_mode; + vk::Extent2D extent; + vk::SurfaceTransformFlagBitsKHR transform; + u32 image_count; + + struct Image { + vk::Image image; + vk::ImageView image_view; + vk::Framebuffer framebuffer; + }; + + // Swapchain state + std::vector swapchain_images; + u32 current_image = 0; + u32 current_frame = 0; + bool vsync_enabled = false; + bool is_outdated = true; + bool is_suboptimal = true; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp new file mode 100644 index 000000000..08b3f7b96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -0,0 +1,252 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} { + vk::Device device = instance.GetDevice(); + const vk::CommandPoolCreateInfo command_pool_info = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex() + }; + + command_pool = device.createCommandPool(command_pool_info); + + // If supported, prefer timeline semaphores over binary ones + if (instance.IsTimelineSemaphoreSupported()) { + const vk::StructureChain timeline_info = { + vk::SemaphoreCreateInfo{}, + vk::SemaphoreTypeCreateInfo{ + .semaphoreType = vk::SemaphoreType::eTimeline, + .initialValue = 0 + } + }; + + timeline = device.createSemaphore(timeline_info.get()); + } + + constexpr std::array pool_sizes = { + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024}, + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024}, + vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, 512}, + vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024} + }; + + const vk::DescriptorPoolCreateInfo descriptor_pool_info = { + .maxSets = 2048, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data() + }; + + const vk::CommandBufferAllocateInfo buffer_info = { + .commandPool = command_pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT + }; + + const auto command_buffers = device.allocateCommandBuffers(buffer_info); + for (std::size_t i = 0; i < commands.size(); i++) { + commands[i] = ExecutionSlot{ + .image_acquired = device.createSemaphore({}), + .present_ready = device.createSemaphore({}), + .fence = device.createFence({}), + .descriptor_pool = device.createDescriptorPool(descriptor_pool_info), + .render_command_buffer = command_buffers[2 * i], + .upload_command_buffer = command_buffers[2 * i + 1], + }; + } + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + // Begin first command + auto& command = commands[current_command]; + command.render_command_buffer.begin(begin_info); + command.fence_counter = next_fence_counter++; +} + +TaskScheduler::~TaskScheduler() { + vk::Device device = instance.GetDevice(); + device.waitIdle(); + + if (timeline) { + device.destroySemaphore(timeline); + } + + for (const auto& command : commands) { + device.destroyFence(command.fence); + device.destroySemaphore(command.image_acquired); + device.destroySemaphore(command.present_ready); + device.destroyDescriptorPool(command.descriptor_pool); + } + + device.destroyCommandPool(command_pool); +} + +void TaskScheduler::Synchronize(u32 slot) { + const auto& command = commands[slot]; + vk::Device device = instance.GetDevice(); + + u32 completed_counter = completed_fence_counter; + if (instance.IsTimelineSemaphoreSupported()) { + completed_counter = device.getSemaphoreCounterValue(timeline); + } + + if (command.fence_counter > completed_counter) { + if (instance.IsTimelineSemaphoreSupported()) { + const vk::SemaphoreWaitInfo wait_info = { + .semaphoreCount = 1, + .pSemaphores = &timeline, + .pValues = &command.fence_counter + }; + + if (device.waitSemaphores(wait_info, UINT64_MAX) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter); + UNREACHABLE(); + } + + } else if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter); + UNREACHABLE(); + } + } + + completed_fence_counter = command.fence_counter; + device.resetFences(command.fence); + device.resetDescriptorPool(command.descriptor_pool); +} + +void TaskScheduler::Submit(SubmitMode mode) { + const auto& command = commands[current_command]; + command.render_command_buffer.end(); + if (command.use_upload_buffer) { + command.upload_command_buffer.end(); + } + + u32 command_buffer_count = 0; + std::array command_buffers; + + if (command.use_upload_buffer) { + command_buffers[command_buffer_count++] = command.upload_command_buffer; + } + + command_buffers[command_buffer_count++] = command.render_command_buffer; + + const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced); + if (instance.IsTimelineSemaphoreSupported()) { + const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u; + const std::array wait_values{command.fence_counter - 1, 1ul}; + const std::array wait_semaphores{timeline, command.image_acquired}; + + const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u; + const std::array signal_values{command.fence_counter, 0ul}; + const std::array signal_semaphores{timeline, command.present_ready}; + + const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { + .waitSemaphoreValueCount = wait_semaphore_count, + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = signal_semaphore_count, + .pSignalSemaphoreValues = signal_values.data() + }; + + const std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = wait_semaphore_count, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = command_buffer_count, + .pCommandBuffers = command_buffers.data(), + .signalSemaphoreCount = signal_semaphore_count, + .pSignalSemaphores = signal_semaphores.data(), + }; + + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info); + + } else { + const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u; + const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u; + const vk::PipelineStageFlags wait_stage_masks = + vk::PipelineStageFlagBits::eColorAttachmentOutput; + + const vk::SubmitInfo submit_info = { + .waitSemaphoreCount = wait_semaphore_count, + .pWaitSemaphores = &command.image_acquired, + .pWaitDstStageMask = &wait_stage_masks, + .commandBufferCount = command_buffer_count, + .pCommandBuffers = command_buffers.data(), + .signalSemaphoreCount = signal_semaphore_count, + .pSignalSemaphores = &command.present_ready, + }; + + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info, command.fence); + } + + // Block host until the GPU catches up + if (True(mode & SubmitMode::Flush)) { + Synchronize(current_command); + } + + // Switch to next cmdbuffer. + if (False(mode & SubmitMode::Shutdown)) { + SwitchSlot(); + } +} + +u64 TaskScheduler::GetFenceCounter() const { + vk::Device device = instance.GetDevice(); + if (instance.IsTimelineSemaphoreSupported()) { + return device.getSemaphoreCounterValue(timeline); + } + + return completed_fence_counter; +} + +vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() { + auto& command = commands[current_command]; + if (!command.use_upload_buffer) { + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + command.upload_command_buffer.begin(begin_info); + command.use_upload_buffer = true; + } + + return command.upload_command_buffer; +} + +void TaskScheduler::SwitchSlot() { + current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT; + auto& command = commands[current_command]; + + // Wait for the GPU to finish with all resources for this command. + Synchronize(current_command); + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + // Begin the next command buffer. + command.render_command_buffer.begin(begin_info); + command.fence_counter = next_fence_counter++; + command.use_upload_buffer = false; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h new file mode 100644 index 000000000..6cfb7cead --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -0,0 +1,97 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "common/common_funcs.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Buffer; +class Instance; + +enum class SubmitMode : u8 { + SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain + Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads + Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer +}; + +DECLARE_ENUM_FLAG_OPERATORS(SubmitMode); + +class TaskScheduler { +public: + TaskScheduler(const Instance& instance); + ~TaskScheduler(); + + /// Blocks the host until the current command completes execution + void Synchronize(u32 slot); + + /// Submits the current command to the graphics queue + void Submit(SubmitMode mode); + + /// Returns the last completed fence counter + u64 GetFenceCounter() const; + + /// Returns the command buffer used for early upload operations. + vk::CommandBuffer GetUploadCommandBuffer(); + + /// Returns the command buffer used for rendering + vk::CommandBuffer GetRenderCommandBuffer() const { + return commands[current_command].render_command_buffer; + } + + /// Returns the current descriptor pool + vk::DescriptorPool GetDescriptorPool() const { + return commands[current_command].descriptor_pool; + } + + /// Returns the index of the current command slot + u32 GetCurrentSlotIndex() const { + return current_command; + } + + u64 GetHostFenceCounter() const { + return next_fence_counter - 1; + } + + vk::Semaphore GetImageAcquiredSemaphore() const { + return commands[current_command].image_acquired; + } + + vk::Semaphore GetPresentReadySemaphore() const { + return commands[current_command].present_ready; + } + +private: + /// Activates the next command slot and optionally waits for its completion + void SwitchSlot(); + +private: + const Instance& instance; + u64 next_fence_counter = 1; + u64 completed_fence_counter = 0; + + struct ExecutionSlot { + bool use_upload_buffer = false; + u64 fence_counter = 0; + vk::Semaphore image_acquired; + vk::Semaphore present_ready; + vk::Fence fence; + vk::DescriptorPool descriptor_pool; + vk::CommandBuffer render_command_buffer; + vk::CommandBuffer upload_command_buffer; + }; + + vk::CommandPool command_pool{}; + vk::Semaphore timeline{}; + std::array commands{}; + u32 current_command = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp new file mode 100644 index 000000000..7556eac7e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -0,0 +1,705 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Vulkan { + +vk::Format ToVkFormat(VideoCore::PixelFormat format) { + switch (format) { + case VideoCore::PixelFormat::RGBA8: + return vk::Format::eR8G8B8A8Unorm; + case VideoCore::PixelFormat::RGB8: + return vk::Format::eR8G8B8Unorm; + case VideoCore::PixelFormat::RGB5A1: + return vk::Format::eR5G5B5A1UnormPack16; + case VideoCore::PixelFormat::RGB565: + return vk::Format::eR5G6B5UnormPack16; + case VideoCore::PixelFormat::RGBA4: + return vk::Format::eR4G4B4A4UnormPack16; + case VideoCore::PixelFormat::D16: + return vk::Format::eD16Unorm; + case VideoCore::PixelFormat::D24: + return vk::Format::eX8D24UnormPack32; + case VideoCore::PixelFormat::D24S8: + return vk::Format::eD24UnormS8Uint; + case VideoCore::PixelFormat::Invalid: + LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format); + return vk::Format::eUndefined; + default: + // Use default case for the texture formats + return vk::Format::eR8G8B8A8Unorm; + } +} + +vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return vk::ImageAspectFlagBits::eColor; + case VideoCore::SurfaceType::Depth: + return vk::ImageAspectFlagBits::eDepth; + case VideoCore::SurfaceType::DepthStencil: + return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return vk::ImageAspectFlagBits::eColor; +} + +vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return vk::FormatFeatureFlagBits::eColorAttachment; + case VideoCore::SurfaceType::Depth: + case VideoCore::SurfaceType::DepthStencil: + return vk::FormatFeatureFlagBits::eDepthStencilAttachment; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return vk::FormatFeatureFlagBits::eColorAttachment; +} + +constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024; + +TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler, + RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} { + + for (auto& buffer : staging_buffers) { + buffer = std::make_unique(instance, STAGING_BUFFER_SIZE, + vk::BufferUsageFlagBits::eTransferSrc | + vk::BufferUsageFlagBits::eTransferDst); + } +} + +TextureRuntime::~TextureRuntime() { + VmaAllocator allocator = instance.GetAllocator(); + vk::Device device = instance.GetDevice(); + device.waitIdle(); + + for (const auto& [key, alloc] : texture_recycler) { + vmaDestroyImage(allocator, alloc.image, alloc.allocation); + device.destroyImageView(alloc.image_view); + } + + for (const auto& [key, framebuffer] : clear_framebuffers) { + device.destroyFramebuffer(framebuffer); + } + + texture_recycler.clear(); +} + +StagingData TextureRuntime::FindStaging(u32 size, bool upload) { + const u32 current_slot = scheduler.GetCurrentSlotIndex(); + const u32 offset = staging_offsets[current_slot]; + if (offset + size > STAGING_BUFFER_SIZE) { + LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!"); + UNREACHABLE(); + } + + const auto& buffer = staging_buffers[current_slot]; + return StagingData{ + .buffer = buffer->buffer, + .size = size, + .mapped = buffer->mapped.subspan(offset, size), + .buffer_offset = offset + }; +} + +void TextureRuntime::OnSlotSwitch(u32 new_slot) { + staging_offsets[new_slot] = 0; +} + +ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, + VideoCore::TextureType type) { + + const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; + const VideoCore::HostTextureTag key = { + .format = format, + .width = width, + .height = height, + .layers = layers + }; + + // Attempt to recycle an unused allocation + if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { + ImageAlloc alloc = std::move(it->second); + texture_recycler.erase(it); + return alloc; + } + + // Create a new allocation + vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format)); + vk::ImageAspectFlags aspect = GetImageAspect(vk_format); + + const u32 levels = std::bit_width(std::max(width, height)); + const vk::ImageCreateInfo image_info = { + .flags = type == VideoCore::TextureType::CubeMap ? + vk::ImageCreateFlagBits::eCubeCompatible : + vk::ImageCreateFlags{}, + .imageType = vk::ImageType::e2D, + .format = vk_format, + .extent = {width, height, 1}, + .mipLevels = levels, + .arrayLayers = layers, + .samples = vk::SampleCountFlagBits::e1, + .usage = GetImageUsage(aspect), + }; + + const VmaAllocationCreateInfo alloc_info = { + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + VmaAllocation allocation; + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &allocation, nullptr); + if (result != VK_SUCCESS) { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + + vk::Image image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = type == VideoCore::TextureType::CubeMap ? + vk::ImageViewType::eCube : + vk::ImageViewType::e2D, + .format = vk_format, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = layers + } + }; + + vk::Device device = instance.GetDevice(); + vk::ImageView image_view = device.createImageView(view_info); + + return ImageAlloc{ + .image = image, + .image_view = image_view, + .allocation = allocation, + .aspect = aspect, + .levels = levels, + }; +} + +void TextureRuntime::Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc) { + texture_recycler.emplace(tag, std::move(alloc)); +} + +void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest) { + const VideoCore::SurfaceType type = VideoCore::GetFormatType(format); + const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type); + + if (format == VideoCore::PixelFormat::RGBA8) { + return Pica::Texture::ConvertABGRToRGBA(source, dest); + } else if (format == VideoCore::PixelFormat::RGB8 && upload) { + return Pica::Texture::ConvertBGRToRGBA(source, dest); + } else if (instance.IsFormatSupported(ToVkFormat(format), feature)) { + std::memcpy(dest.data(), source.data(), source.size()); + } else { + LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format); + std::memcpy(dest.data(), source.data(), source.size()); + } +} + +bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value) { + const vk::ImageAspectFlags aspect = ToVkAspect(surface.type); + renderpass_cache.ExitRenderpass(); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, + 0, surface.alloc.levels, 0, surface.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + + vk::ClearValue clear_value{}; + if (aspect & vk::ImageAspectFlagBits::eColor) { + clear_value.color = vk::ClearColorValue{ + .float32 = std::to_array({value.color[0], value.color[1], value.color[2], value.color[3]}) + }; + } else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) { + clear_value.depthStencil = vk::ClearDepthStencilValue{ + .depth = value.depth, + .stencil = value.stencil + }; + } + + // For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage + if (clear.texture_rect == surface.GetScaledRect()) { + const vk::ImageSubresourceRange range = { + .aspectMask = aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }; + + if (aspect & vk::ImageAspectFlagBits::eColor) { + command_buffer.clearColorImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, + clear_value.color, range); + } else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) { + command_buffer.clearDepthStencilImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, + clear_value.depthStencil, range); + } + } else { + // For partial clears we begin a clear renderpass with the appropriate render area + vk::RenderPass clear_renderpass{}; + ImageAlloc& alloc = surface.alloc; + if (aspect & vk::ImageAspectFlagBits::eColor) { + clear_renderpass = renderpass_cache.GetRenderpass(surface.pixel_format, + VideoCore::PixelFormat::Invalid, true); + Transition(command_buffer, alloc, vk::ImageLayout::eColorAttachmentOptimal, 0, alloc.levels); + } else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) { + clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, + surface.pixel_format, true); + Transition(command_buffer, alloc, vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, alloc.levels); + } + + auto [it, new_framebuffer] = clear_framebuffers.try_emplace(alloc.image_view, vk::Framebuffer{}); + if (new_framebuffer) { + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = clear_renderpass, + .attachmentCount = 1, + .pAttachments = &alloc.image_view, + .width = surface.GetScaledWidth(), + .height = surface.GetScaledHeight(), + .layers = 1 + }; + + vk::Device device = instance.GetDevice(); + it->second = device.createFramebuffer(framebuffer_info); + } + + const vk::RenderPassBeginInfo clear_begin_info = { + .renderPass = clear_renderpass, + .framebuffer = it->second, + .renderArea = vk::Rect2D{ + .offset = {static_cast(clear.texture_rect.left), static_cast(clear.texture_rect.bottom)}, + .extent = {clear.texture_rect.GetWidth(), clear.texture_rect.GetHeight()} + }, + .clearValueCount = 1, + .pClearValues = &clear_value + }; + + renderpass_cache.EnterRenderpass(clear_begin_info); + renderpass_cache.ExitRenderpass(); + } + + return true; +} + +bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + renderpass_cache.ExitRenderpass(); + + const vk::ImageCopy image_copy = { + .srcSubresource = { + .aspectMask = ToVkAspect(source.type), + .mipLevel = copy.src_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), 0}, + .dstSubresource = { + .aspectMask = ToVkAspect(dest.type), + .mipLevel = copy.dst_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), 0}, + .extent = {copy.extent.width, copy.extent.height, 1} + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, 0, source.alloc.levels); + Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, 0, dest.alloc.levels); + + command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, + dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + + return true; +} + +bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { + renderpass_cache.ExitRenderpass(); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, + 0, source.alloc.levels, 0, source.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, + 0, dest.alloc.levels, 0, dest.texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + + const std::array source_offsets = { + vk::Offset3D{static_cast(blit.src_rect.left), static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), static_cast(blit.src_rect.top), 1} + }; + + const std::array dest_offsets = { + vk::Offset3D{static_cast(blit.dst_rect.left), static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), 1} + }; + + const vk::ImageBlit blit_area = { + .srcSubresource = { + .aspectMask = ToVkAspect(source.type), + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1 + }, + .srcOffsets = source_offsets, + .dstSubresource = { + .aspectMask = ToVkAspect(dest.type), + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1 + }, + .dstOffsets = dest_offsets + }; + + command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, + dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, + blit_area, vk::Filter::eLinear); + + return true; +} + +void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { + renderpass_cache.ExitRenderpass(); + + // TODO: Investigate AMD single pass downsampler + s32 current_width = surface.GetScaledWidth(); + s32 current_height = surface.GetScaledHeight(); + + const u32 levels = std::bit_width(std::max(surface.width, surface.height)); + vk::ImageAspectFlags aspect = ToVkAspect(surface.type); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + for (u32 i = 1; i < levels; i++) { + Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); + Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1); + + const std::array source_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{current_width, current_height, 1} + }; + + const std::array dest_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{current_width > 1 ? current_width / 2 : 1, + current_height > 1 ? current_height / 2 : 1, 1} + }; + + const vk::ImageBlit blit_area = { + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = i - 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .srcOffsets = source_offsets, + .dstSubresource = { + .aspectMask = aspect, + .mipLevel = i, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .dstOffsets = dest_offsets + }; + + command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal, + surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, + blit_area, vk::Filter::eLinear); + } +} + +void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, + vk::ImageLayout new_layout, u32 level, u32 level_count, + u32 layer, u32 layer_count) { + if (new_layout == alloc.layout || !alloc.image) { + return; + } + + struct LayoutInfo { + vk::AccessFlags access; + vk::PipelineStageFlags stage; + }; + + // Get optimal transition settings for every image layout. Settings taken from Dolphin + auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo { + LayoutInfo info; + switch (layout) { + case vk::ImageLayout::eUndefined: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + info.access = vk::AccessFlagBits::eNone; + info.stage = vk::PipelineStageFlagBits::eTopOfPipe; + break; + case vk::ImageLayout::ePreinitialized: + // Image has been pre-initialized by the host, so ensure all writes have completed. + info.access = vk::AccessFlagBits::eHostWrite; + info.stage = vk::PipelineStageFlagBits::eHost; + break; + case vk::ImageLayout::eColorAttachmentOptimal: + // Image was being used as a color attachment, so ensure all writes have completed. + info.access = vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite; + info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput; + break; + case vk::ImageLayout::eDepthStencilAttachmentOptimal: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; + break; + case vk::ImageLayout::ePresentSrcKHR: + info.access = vk::AccessFlagBits::eNone; + info.stage = vk::PipelineStageFlagBits::eBottomOfPipe; + break; + case vk::ImageLayout::eShaderReadOnlyOptimal: + // Image was being used as a shader resource, make sure all reads have finished. + info.access = vk::AccessFlagBits::eShaderRead; + info.stage = vk::PipelineStageFlagBits::eFragmentShader; + break; + case vk::ImageLayout::eTransferSrcOptimal: + // Image was being used as a copy source, ensure all reads have finished. + info.access = vk::AccessFlagBits::eTransferRead; + info.stage = vk::PipelineStageFlagBits::eTransfer; + break; + case vk::ImageLayout::eTransferDstOptimal: + // Image was being used as a copy destination, ensure all writes have finished. + info.access = vk::AccessFlagBits::eTransferWrite; + info.stage = vk::PipelineStageFlagBits::eTransfer; + break; + case vk::ImageLayout::eGeneral: + info.access = vk::AccessFlagBits::eInputAttachmentRead; + info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput | + vk::PipelineStageFlagBits::eFragmentShader; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); + UNREACHABLE(); + } + + return info; + }; + + LayoutInfo source = GetLayoutInfo(alloc.layout); + LayoutInfo dest = GetLayoutInfo(new_layout); + + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = source.access, + .dstAccessMask = dest.access, + .oldLayout = alloc.layout, + .newLayout = new_layout, + .image = alloc.image, + .subresourceRange = { + .aspectMask = alloc.aspect, + .baseMipLevel = /*level*/0, + .levelCount = /*level_count*/alloc.levels, + .baseArrayLayer = layer, + .layerCount = layer_count + } + }; + + command_buffer.pipelineBarrier(source.stage, dest.stage, + vk::DependencyFlagBits::eByRegion, + {}, {}, barrier); + + alloc.layout = new_layout; +} + +Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) + : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, + scheduler{runtime.GetScheduler()} { + + if (pixel_format != VideoCore::PixelFormat::Invalid) { + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); + } +} + +Surface::~Surface() { + if (pixel_format != VideoCore::PixelFormat::Invalid) { + const VideoCore::HostTextureTag tag = { + .format = pixel_format, + .width = GetScaledWidth(), + .height = GetScaledHeight(), + .layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u + }; + + runtime.Recycle(tag, std::move(alloc)); + } +} + +MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); +void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { + MICROPROFILE_SCOPE(Vulkan_Upload); + + runtime.renderpass_cache.ExitRenderpass(); + + const bool is_scaled = res_scale != 1; + if (is_scaled) { + ScaledUpload(upload); + } else { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const VideoCore::Rect2D rect = upload.texture_rect; + const vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = { + .aspectMask = alloc.aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} + }; + + runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, 0, alloc.levels, + 0, texture_type == VideoCore::TextureType::CubeMap ? 6 : 1); + command_buffer.copyBufferToImage(staging.buffer, alloc.image, + vk::ImageLayout::eTransferDstOptimal, + copy_region); + } + + InvalidateAllWatcher(); + + // Lock this data until the next scheduler switch + const u32 current_slot = scheduler.GetCurrentSlotIndex(); + runtime.staging_offsets[current_slot] += staging.size; +} + +MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64)); +void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) { + MICROPROFILE_SCOPE(Vulkan_Download); + + runtime.renderpass_cache.ExitRenderpass(); + + const bool is_scaled = res_scale != 1; + if (is_scaled) { + ScaledDownload(download); + } else { + u32 region_count = 0; + std::array copy_regions; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const VideoCore::Rect2D rect = download.texture_rect; + vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = { + .aspectMask = alloc.aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} + }; + + if (alloc.aspect & vk::ImageAspectFlagBits::eColor) { + copy_regions[region_count++] = copy_region; + } else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) { + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + copy_regions[region_count++] = copy_region; + + if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { + return; // HACK: Skip depth + stencil downloads for now + copy_region.bufferOffset += staging.mapped.size(); + copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil; + copy_regions[region_count++] = copy_region; + } + } + + runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); + + // Copy pixel data to the staging buffer + command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal, + staging.buffer, region_count, copy_regions.data()); + + scheduler.Submit(SubmitMode::Flush); + } + + // Lock this data until the next scheduler switch + const u32 current_slot = scheduler.GetCurrentSlotIndex(); + runtime.staging_offsets[current_slot] += staging.size; +} + +void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { + /*const u32 rect_width = download.texture_rect.GetWidth(); + const u32 rect_height = download.texture_rect.GetHeight(); + + // Allocate an unscaled texture that fits the download rectangle to use as a blit destination + const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, + VideoCore::TextureType::Texture2D); + runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); + runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture); + + // Blit the scaled rectangle to the unscaled texture + const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; + glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top, + 0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); + + const auto& tuple = runtime.GetFormatTuple(pixel_format); + if (driver.IsOpenGLES()) { + const auto& downloader_es = runtime.GetDownloaderES(); + downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect_height, rect_width, + reinterpret_cast(download.buffer_offset)); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + reinterpret_cast(download.buffer_offset)); + }*/ +} + +void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { + /*const u32 rect_width = upload.texture_rect.GetWidth(); + const u32 rect_height = upload.texture_rect.GetHeight(); + + OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, + VideoCore::TextureType::Texture2D); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); + + glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, + tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); + + const auto scaled_rect = upload.texture_rect * res_scale; + const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; + const auto& filterer = runtime.GetFilterer(); + if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) { + runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); + runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture); + + // If filtering fails, resort to normal blitting + glBlitFramebuffer(0, 0, rect_width, rect_height, + upload.texture_rect.left, upload.texture_rect.bottom, + upload.texture_rect.right, upload.texture_rect.top, + MakeBufferMask(type), GL_LINEAR); + }*/ +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h new file mode 100644 index 000000000..93d6dde70 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -0,0 +1,143 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/rasterizer_cache/types.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" + +namespace Vulkan { + +struct StagingData { + vk::Buffer buffer; + u32 size = 0; + std::span mapped{}; + u32 buffer_offset = 0; +}; + +struct ImageAlloc { + vk::Image image; + vk::ImageView image_view; + VmaAllocation allocation; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone; + u32 levels = 1; +}; + +class Instance; +class RenderpassCache; +class Surface; + +/** + * Provides texture manipulation functions to the rasterizer cache + * Separating this into a class makes it easier to abstract graphics API code + */ +class TextureRuntime { + friend class Surface; +public: + TextureRuntime(const Instance& instance, TaskScheduler& scheduler, + RenderpassCache& renderpass_cache); + ~TextureRuntime(); + + /// Maps an internal staging buffer of the provided size of pixel uploads/downloads + [[nodiscard]] StagingData FindStaging(u32 size, bool upload); + + /// Allocates a vulkan image possibly resusing an existing one + [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format, + VideoCore::TextureType type); + + /// Takes back ownership of the allocation for recycling + void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc); + + /// Performs required format convertions on the staging data + void FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest); + + /// Transitions the mip level range of the surface to new_layout + void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, + vk::ImageLayout new_layout, u32 level, u32 level_count, + u32 layer = 0, u32 layer_count = 1); + + /// Fills the rectangle of the texture with the clear value provided + bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value); + + /// Copies a rectangle of src_tex to another rectange of dst_rect + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + + /// Blits a rectangle of src_tex to another rectange of dst_rect + bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); + + /// Generates mipmaps for all the available levels of the texture + void GenerateMipmaps(Surface& surface, u32 max_level); + + /// Performs operations that need to be done on every scheduler slot switch + void OnSlotSwitch(u32 new_slot); + +private: + /// Returns the current Vulkan instance + const Instance& GetInstance() const { + return instance; + } + + /// Returns the current Vulkan scheduler + TaskScheduler& GetScheduler() const { + return scheduler; + } + +private: + const Instance& instance; + TaskScheduler& scheduler; + RenderpassCache& renderpass_cache; + std::array, SCHEDULER_COMMAND_COUNT> staging_buffers; + std::array staging_offsets{}; + std::unordered_multimap texture_recycler; + std::unordered_map clear_framebuffers; +}; + +class Surface : public VideoCore::SurfaceBase { + friend class TextureRuntime; + friend class RasterizerVulkan; +public: + Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); + ~Surface() override; + + /// Uploads pixel data in staging to a rectangle region of the surface texture + void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); + + /// Downloads pixel data to staging from a rectangle region of the surface texture + void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging); + +private: + /// Downloads scaled image by downscaling the requested rectangle + void ScaledDownload(const VideoCore::BufferTextureCopy& download); + + /// Uploads pixel data to scaled texture + void ScaledUpload(const VideoCore::BufferTextureCopy& upload); + + /// Overrides the image layout of the mip level range + void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); + +private: + TextureRuntime& runtime; + const Instance& instance; + TaskScheduler& scheduler; + + ImageAlloc alloc{}; + vk::Format internal_format = vk::Format::eUndefined; +}; + +struct Traits { + using RuntimeType = TextureRuntime; + using SurfaceType = Surface; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + +} // namespace Vulkan diff --git a/src/video_core/shader/shader_cache.h b/src/video_core/shader/shader_cache.h new file mode 100644 index 000000000..4cf03da3c --- /dev/null +++ b/src/video_core/shader/shader_cache.h @@ -0,0 +1,97 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "video_core/shader/shader.h" + +namespace Pica::Shader { + +template +using ShaderCacheResult = std::pair>; + +template +class ShaderCache { +public: + ShaderCache() {} + ~ShaderCache() = default; + + /// Returns a shader handle generated from the provided config + template + auto Get(const KeyType& config, Args&&... args) -> ShaderCacheResult { + auto [iter, new_shader] = shaders.emplace(config, ShaderType{}); + auto& shader = iter->second; + + if (new_shader) { + std::string code = CodeGenerator(config); + shader = ModuleCompiler(code, args...); + return std::make_pair(shader, code); + } + + return std::make_pair(shader, std::nullopt); + } + + void Inject(const KeyType& key, ShaderType&& shader) { + shaders.emplace(key, std::move(shader)); + } + +public: + std::unordered_map shaders; +}; + +/** + * This is a cache designed for shaders translated from PICA shaders. The first cache matches the + * config structure like a normal cache does. On cache miss, the second cache matches the generated + * GLSL code. The configuration is like this because there might be leftover code in the PICA shader + * program buffer from the previous shader, which is hashed into the config, resulting several + * different config values from the same shader program. + */ +template (*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyType&)> +class ShaderDoubleCache { +public: + ShaderDoubleCache() = default; + ~ShaderDoubleCache() = default; + + template + auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) -> ShaderCacheResult { + if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) { + auto code = CodeGenerator(setup, key); + if (!code) { + shader_map[key] = nullptr; + return std::make_pair(ShaderType{}, std::nullopt); + } + + std::string& program = code.value(); + auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{}); + auto& shader = iter->second; + + if (new_shader) { + shader = ModuleCompiler(program, args...); + } + + shader_map[key] = &shader; + return std::make_pair(shader, std::move(program)); + } else { + return std::make_pair(*map_iter->second, std::nullopt); + } + } + + void Inject(const KeyType& key, std::string decomp, ShaderType&& program) { + const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first; + + auto& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + +public: + std::unordered_map shader_map; + std::unordered_map shader_cache; +}; + +} // namespace Pica::Shader diff --git a/src/video_core/shader/shader_uniforms.cpp b/src/video_core/shader/shader_uniforms.cpp new file mode 100644 index 000000000..ec8336ca4 --- /dev/null +++ b/src/video_core/shader/shader_uniforms.cpp @@ -0,0 +1,25 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_uniforms.h" + +namespace Pica::Shader { + +void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup) { + std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), + [](bool value) -> BoolAligned { return {value ? 1 : 0}; }); + std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), + [](const auto& value) -> Common::Vec4u { + return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; + }); + std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), + [](const auto& value) -> Common::Vec4f { + return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), + value.w.ToFloat32()}; + }); +} + +} // namespace Pica::Shader diff --git a/src/video_core/shader/shader_uniforms.h b/src/video_core/shader/shader_uniforms.h new file mode 100644 index 000000000..b1fcac362 --- /dev/null +++ b/src/video_core/shader/shader_uniforms.h @@ -0,0 +1,99 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/vector_math.h" +#include "video_core/regs_lighting.h" + +namespace Pica { +struct ShaderRegs; +} + +namespace Pica::Shader { + +class ShaderSetup; + +enum class UniformBindings : u32 { Common, VS, GS }; + +struct LightSrc { + alignas(16) Common::Vec3f specular_0; + alignas(16) Common::Vec3f specular_1; + alignas(16) Common::Vec3f diffuse; + alignas(16) Common::Vec3f ambient; + alignas(16) Common::Vec3f position; + alignas(16) Common::Vec3f spot_direction; // negated + float dist_atten_bias; + float dist_atten_scale; +}; + +/** + * Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned + * NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at + * the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. + * Not following that rule will cause problems on some AMD drivers. + */ +struct UniformData { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + bool enable_clip1; + alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4]; + alignas(16) Common::Vec3f fog_color; + alignas(8) Common::Vec2f proctex_noise_f; + alignas(8) Common::Vec2f proctex_noise_a; + alignas(8) Common::Vec2f proctex_noise_p; + alignas(16) Common::Vec3f lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) Common::Vec4f tev_combiner_buffer_color; + alignas(16) Common::Vec4f clip_coef; +}; + +static_assert(sizeof(UniformData) == 0x4F0, + "The size of the UniformData does not match the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/** + * Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. + * NOTE: the same rule from UniformData also applies here. + */ +struct PicaUniformsData { + void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup); + + struct BoolAligned { + alignas(16) int b; + }; + + std::array bools; + alignas(16) std::array i; + alignas(16) std::array f; +}; + +struct VSUniformData { + PicaUniformsData uniforms; +}; +static_assert(sizeof(VSUniformData) == 1856, + "The size of the VSUniformData does not match the structure in the shader"); +static_assert(sizeof(VSUniformData) < 16384, + "VSUniformData structure must be less than 16kb as per the OpenGL spec"); + + +} // namespace Pica::Shader diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp index 9ed7f6337..b3efedd77 100644 --- a/src/video_core/texture/texture_decode.cpp +++ b/src/video_core/texture/texture_decode.cpp @@ -227,14 +227,14 @@ void ConvertBGRToRGB(std::span source, std::span des for (std::size_t i = 0; i < source.size(); i += 3) { u32 bgr{}; std::memcpy(&bgr, source.data() + i, 3); - const u32 rgb = std::byteswap(bgr << 8); + const u32 rgb = Common::swap32(bgr << 8); std::memcpy(dest.data(), &rgb, 3); } } void ConvertBGRToRGBA(std::span source, std::span dest) { u32 j = 0; - for (u32 i = 0; i < source.size(); i += 3) { + for (std::size_t i = 0; i < source.size(); i += 3) { dest[j] = source[i + 2]; dest[j + 1] = source[i + 1]; dest[j + 2] = source[i]; @@ -246,7 +246,7 @@ void ConvertBGRToRGBA(std::span source, std::span de void ConvertABGRToRGBA(std::span source, std::span dest) { for (u32 i = 0; i < source.size(); i += 4) { const u32 abgr = *reinterpret_cast(source.data() + i); - const u32 rgba = std::byteswap(abgr); + const u32 rgba = Common::swap32(abgr); std::memcpy(dest.data() + i, &rgba, 4); } } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index a08081c29..8ad97e122 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -11,6 +11,7 @@ #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/video_core.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -44,15 +45,26 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondar g_memory = &memory; Pica::Init(); - OpenGL::GLES = Settings::values.graphics_api.GetValue() == Settings::GraphicsAPI::OpenGLES; + const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api.GetValue(); + switch (graphics_api) { + case Settings::GraphicsAPI::OpenGL: + case Settings::GraphicsAPI::OpenGLES: + OpenGL::GLES = graphics_api == Settings::GraphicsAPI::OpenGLES; + g_renderer = std::make_unique(emu_window, secondary_window); + break; + case Settings::GraphicsAPI::Vulkan: + g_renderer = std::make_unique(emu_window); + break; + default: + LOG_CRITICAL(Render, "Invalid graphics API enum value {}", graphics_api); + UNREACHABLE(); + } - g_renderer = std::make_unique(emu_window, secondary_window); ResultStatus result = g_renderer->Init(); - if (result != ResultStatus::Success) { - LOG_ERROR(Render, "initialization failed !"); + LOG_ERROR(Render, "Video core initialization failed"); } else { - LOG_DEBUG(Render, "initialized OK"); + LOG_INFO(Render, "Video core initialization OK"); } return result; diff --git a/src/web_service/verify_user_jwt.cpp b/src/web_service/verify_user_jwt.cpp index 27e08db9e..47c648f72 100644 --- a/src/web_service/verify_user_jwt.cpp +++ b/src/web_service/verify_user_jwt.cpp @@ -3,8 +3,8 @@ // Refer to the license.txt file included. #include -#include #include "common/logging/log.h" +#include #include "common/web_result.h" #include "web_service/verify_user_jwt.h" #include "web_service/web_backend.h"