From ab3a228e5ed2e9ca3fb4e5b2defa65fd93b4c38e Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Sun, 18 Sep 2022 01:11:37 +0300 Subject: [PATCH] renderer_vulkan: Implement renderer and rasterizer classes * Also WIP. Vulkan crashes when allocating command buffers, need to investigate... --- src/common/logging/backend.cpp | 1 + src/video_core/CMakeLists.txt | 8 +- .../rasterizer_cache/rasterizer_cache.h | 39 +- src/video_core/rasterizer_cache/utils.h | 1 - src/video_core/rasterizer_interface.h | 11 - src/video_core/regs_framebuffer.h | 1 + src/video_core/renderer_base.cpp | 4 - src/video_core/renderer_base.h | 12 +- .../renderer_opengl/gl_rasterizer.h | 5 +- .../renderer_opengl/gl_shader_manager.h | 16 +- .../renderer_opengl/gl_texture_runtime.cpp | 35 +- .../renderer_opengl/gl_texture_runtime.h | 6 +- .../renderer_opengl/renderer_opengl.cpp | 11 +- .../renderer_opengl/renderer_opengl.h | 17 +- .../renderer_vulkan/renderer_vulkan.cpp | 966 ++++++++ .../renderer_vulkan/renderer_vulkan.h | 128 + .../renderer_vulkan/vk_pipeline_cache.cpp | 64 +- .../renderer_vulkan/vk_pipeline_cache.h | 33 +- .../renderer_vulkan/vk_rasterizer.cpp | 2128 +++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 314 +++ .../renderer_vulkan/vk_renderpass_cache.cpp | 24 +- .../renderer_vulkan/vk_renderpass_cache.h | 19 +- .../renderer_vulkan/vk_shader_gen.cpp | 9 +- .../renderer_vulkan/vk_stream_buffer.cpp | 38 +- .../renderer_vulkan/vk_stream_buffer.h | 18 +- .../renderer_vulkan/vk_swapchain.cpp | 5 +- src/video_core/renderer_vulkan/vk_swapchain.h | 11 +- .../renderer_vulkan/vk_task_scheduler.cpp | 1 + .../renderer_vulkan/vk_texture_runtime.cpp | 424 ++-- .../renderer_vulkan/vk_texture_runtime.h | 40 +- src/video_core/shader/shader_cache.h | 4 +- src/video_core/shader/shader_uniforms.cpp | 25 + src/video_core/shader/shader_uniforms.h | 98 + src/video_core/video_core.cpp | 22 +- 34 files changed, 4191 insertions(+), 347 deletions(-) create mode 100644 src/video_core/renderer_vulkan/renderer_vulkan.cpp create mode 100644 src/video_core/renderer_vulkan/renderer_vulkan.h create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.cpp create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.h create mode 100644 src/video_core/shader/shader_uniforms.cpp create mode 100644 src/video_core/shader/shader_uniforms.h diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index fdc702521..56df7d9e2 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -235,6 +235,7 @@ void DebuggerBackend::Write(const Entry& entry) { CLS(Render) \ SUB(Render, Software) \ SUB(Render, OpenGL) \ + SUB(Render, Vulkan) \ CLS(Audio) \ SUB(Audio, DSP) \ SUB(Audio, Sink) \ diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8ec0c8942..c0ecc33a2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -83,8 +83,12 @@ add_library(video_core STATIC renderer_opengl/gl_format_reinterpreter.cpp renderer_opengl/gl_format_reinterpreter.h renderer_vulkan/pica_to_vk.h + renderer_vulkan/renderer_vulkan.cpp + renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h renderer_vulkan/vk_pipeline_cache.cpp @@ -110,6 +114,8 @@ add_library(video_core STATIC shader/shader_cache.h shader/shader_interpreter.cpp shader/shader_interpreter.h + shader/shader_uniforms.cpp + shader/shader_uniforms.h swrasterizer/clipper.cpp swrasterizer/clipper.h swrasterizer/framebuffer.cpp @@ -183,7 +189,7 @@ create_target_directory_groups(video_core) target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include) target_include_directories(video_core PRIVATE ../../externals/vma) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad glslang nihstro-headers Boost::serialization) +target_link_libraries(video_core PRIVATE glad glslang SPIRV nihstro-headers Boost::serialization) set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if (ARCHITECTURE_x86_64) diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 0144686e3..62fb394f8 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/alignment.h" #include "common/logging/log.h" @@ -901,24 +902,15 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); const u32 start_offset = load_start - surface->addr; - const u32 upload_size = static_cast(upload_data.size()); MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); - /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); - } else { - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); - }*/ - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + if (surface->is_tiled) { + std::vector unswizzled_data(staging.size); + UnswizzleTexture(*surface, start_offset, upload_data, unswizzled_data); + runtime.FormatConvert(surface->pixel_format, true, unswizzled_data, staging.mapped); } else { - UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); + runtime.FormatConvert(surface->pixel_format, true, upload_data, staging.mapped); } const BufferTextureCopy upload = { @@ -957,24 +949,15 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); const u32 start_offset = flush_start - surface->addr; - const u32 download_size = static_cast(download_dest.size()); MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); - if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto download_data = staging.mapped.subspan(start_offset, download_size); - /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(download_data, download_dest); - } else { - std::memcpy(download_dest.data(), download_data.data(), download_size); - }*/ - std::memcpy(download_dest.data(), download_data.data(), download_size); + if (surface->is_tiled) { + std::vector swizzled_data(staging.size); + SwizzleTexture(*surface, start_offset, staging.mapped, swizzled_data); + runtime.FormatConvert(surface->pixel_format, false, swizzled_data, download_dest); } else { - SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); + runtime.FormatConvert(surface->pixel_format, false, staging.mapped, download_dest); } } diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index e9528e9e7..e26880a04 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -14,7 +14,6 @@ struct HostTextureTag { PixelFormat format{}; u32 width = 0; u32 height = 0; - u32 levels = 1; u32 layers = 1; auto operator<=>(const HostTextureTag&) const noexcept = default; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a17024a50..0d9a825eb 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,10 +9,6 @@ #include "common/common_types.h" #include "core/hw/gpu.h" -namespace OpenGL { -struct ScreenInfo; -} - namespace Pica::Shader { struct OutputVertex; } // namespace Pica::Shader @@ -73,13 +69,6 @@ public: return false; } - /// Attempt to use a faster method to display the framebuffer to screen - virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, - PAddr framebuffer_addr, u32 pixel_stride, - OpenGL::ScreenInfo& screen_info) { - return false; - } - /// Attempt to draw using hardware shaders virtual bool AccelerateDrawBatch(bool is_indexed) { return false; diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h index c43274540..483c462fc 100644 --- a/src/video_core/regs_framebuffer.h +++ b/src/video_core/regs_framebuffer.h @@ -159,6 +159,7 @@ struct FramebufferRegs { } stencil_test; union { + u32 depth_color_mask; BitField<0, 1, u32> depth_test_enable; BitField<4, 3, CompareFunc> depth_test_func; BitField<8, 1, u32> red_enable; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index a3ed0bd14..ce1474b35 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -12,7 +12,3 @@ void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) { const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height, is_portrait_mode); } - -void RendererBase::Sync() { - rasterizer->SyncEntireState(); -} diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index d0d21a2cc..88a6f2358 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -21,6 +21,9 @@ public: /// Initialize the renderer virtual VideoCore::ResultStatus Init() = 0; + /// Returns the rasterizer owned by the renderer + virtual VideoCore::RasterizerInterface* Rasterizer() = 0; + /// Shutdown the renderer virtual void ShutDown() = 0; @@ -37,6 +40,8 @@ public: /// Cleans up after video dumping is ended virtual void CleanupVideoDumping() = 0; + virtual void Sync() = 0; + /// Updates the framebuffer layout of the contained render window handle. void UpdateCurrentFramebufferLayout(bool is_portrait_mode = {}); @@ -51,10 +56,6 @@ public: return m_current_frame; } - VideoCore::RasterizerInterface* Rasterizer() const { - return rasterizer.get(); - } - Frontend::EmuWindow& GetRenderWindow() { return render_window; } @@ -63,11 +64,8 @@ public: return render_window; } - void Sync(); - protected: Frontend::EmuWindow& render_window; ///< Reference to the render window handle. - std::unique_ptr rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index eaf1751a3..f563c429c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -20,6 +20,9 @@ class EmuWindow; } namespace OpenGL { + +struct ScreenInfo; + class Driver; class ShaderProgramManager; @@ -43,7 +46,7 @@ public: bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, - u32 pixel_stride, ScreenInfo& screen_info) override; + u32 pixel_stride, ScreenInfo& screen_info); bool AccelerateDrawBatch(bool is_indexed) override; /// Syncs entire status to match PICA registers diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 66883e7e6..c0a2cf94a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -41,10 +41,12 @@ struct LightSrc { float dist_atten_scale; }; -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at -// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -// Not following that rule will cause problems on some AMD drivers. +/** + * Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned + * NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at + * the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. + * Not following that rule will cause problems on some AMD drivers. + */ struct UniformData { int framebuffer_scale; int alphatest_ref; @@ -81,8 +83,10 @@ static_assert(sizeof(UniformData) == 0x4F0, static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); -/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. -// NOTE: the same rule from UniformData also applies here. +/** + * Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. + * NOTE: the same rule from UniformData also applies here. + */ struct PicaUniformsData { void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index c73f0f8d5..6230f7037 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -124,6 +124,17 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f return DEFAULT_TUPLE; } +void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest) { + if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) { + Pica::Texture::ConvertABGRToRGBA(source, dest); + } else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) { + Pica::Texture::ConvertBGRToRGB(source, dest); + } else { + std::memcpy(dest.data(), source.data(), source.size()); + } +} + OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type) { @@ -302,6 +313,17 @@ Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); } +Surface::~Surface() { + const VideoCore::HostTextureTag tag = { + .format = pixel_format, + .width = GetScaledWidth(), + .height = GetScaledHeight(), + .layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u + }; + + runtime.texture_recycler.emplace(tag, std::move(texture)); +} + MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64)); void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) { MICROPROFILE_SCOPE(OpenGL_Upload); @@ -327,8 +349,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBu upload.texture_rect.left, upload.texture_rect.bottom, upload.texture_rect.GetWidth(), upload.texture_rect.GetHeight(), - tuple.format, tuple.type, - reinterpret_cast(upload.buffer_offset)); + tuple.format, tuple.type, 0); } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -361,7 +382,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi const auto& tuple = runtime.GetFormatTuple(pixel_format); glReadPixels(download.texture_rect.left, download.texture_rect.bottom, download.texture_rect.GetWidth(), download.texture_rect.GetHeight(), - tuple.format, tuple.type, reinterpret_cast(download.buffer_offset)); + tuple.format, tuple.type, 0); } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -390,11 +411,9 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { if (driver.IsOpenGLES()) { const auto& downloader_es = runtime.GetDownloaderES(); downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - rect_height, rect_width, - reinterpret_cast(download.buffer_offset)); + rect_height, rect_width, 0); } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - reinterpret_cast(download.buffer_offset)); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0); } } @@ -409,7 +428,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { const auto& tuple = runtime.GetFormatTuple(pixel_format); glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, - tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); + tuple.format, tuple.type, 0); const auto scaled_rect = upload.texture_rect * res_scale; const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 493d49b7b..d18450cb6 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -70,6 +70,10 @@ public: /// Returns the OpenGL format tuple associated with the provided pixel format const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); + /// Performs required format convertions on the staging data + void FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest); + /// Allocates an OpenGL texture with the specified dimentions and format OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type); @@ -124,7 +128,7 @@ private: class Surface : public VideoCore::SurfaceBase { public: Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); - ~Surface() override = default; + ~Surface() override; /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bd9e6abdc..d1f671df9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -15,7 +15,6 @@ #include "core/settings.h" #include "core/tracer/recorder.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state.h" @@ -381,6 +380,10 @@ VideoCore::ResultStatus RendererOpenGL::Init() { return VideoCore::ResultStatus::Success; } +VideoCore::RasterizerInterface* RendererOpenGL::Rasterizer() { + return rasterizer.get(); +} + /// Shutdown the renderer void RendererOpenGL::ShutDown() {} @@ -570,7 +573,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, + if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; @@ -1190,4 +1193,8 @@ void RendererOpenGL::CleanupVideoDumping() { mailbox->free_cv.notify_one(); } +void RendererOpenGL::Sync() { + rasterizer->SyncEntireState(); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index ab01f71f4..ba6979b8f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,29 +55,21 @@ struct PresentationTexture { OGLTexture texture; }; +class RasterizerOpenGL; + class RendererOpenGL : public RendererBase { public: explicit RendererOpenGL(Frontend::EmuWindow& window); ~RendererOpenGL() override; - /// Initialize the renderer VideoCore::ResultStatus Init() override; - - /// Shutdown the renderer + VideoCore::RasterizerInterface* Rasterizer() override; void ShutDown() override; - - /// Finalizes rendering the guest frame void SwapBuffers() override; - - /// Draws the latest frame from texture mailbox to the currently bound draw framebuffer in this - /// context void TryPresent(int timeout_ms) override; - - /// Prepares for video dumping (e.g. create necessary buffers, etc) void PrepareVideoDumping() override; - - /// Cleans up after video dumping is ended void CleanupVideoDumping() override; + void Sync() override; private: void InitOpenGLObjects(); @@ -108,6 +100,7 @@ private: private: Driver driver; OpenGLState state; + std::unique_ptr rasterizer; // OpenGL object IDs OGLVertexArray vertex_array; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp new file mode 100644 index 000000000..3d833f58a --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -0,0 +1,966 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/frontend/emu_window.h" +#include "core/frontend/framebuffer_layout.h" +#include "core/hw/gpu.h" +#include "core/hw/hw.h" +#include "core/hw/lcd.h" +#include "core/settings.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/video_core.h" + +namespace Vulkan { + +constexpr std::string_view vertex_shader = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; +layout (location = 0) out vec2 frag_tex_coord; + +// This is a truncated 3x3 matrix for 2D transformations: +// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. +// The third column performs translation. +// The third row could be used for projection, which we don't need in 2D. It hence is assumed to +// implicitly be [0, 0, 1] +layout (push_constant) uniform DrawInfo { + mat3x2 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; +}; + +void main() { + // Multiply input position by the rotscale part of the matrix and then manually translate by + // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector + // to `vec3(vert_position.xy, 1.0)` + gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); + gl_Position.y = -gl_Position.y; + frag_tex_coord = vert_tex_coord; +} +)"; + +constexpr std::string_view fragment_shader = R"( +version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (push_constant) uniform DrawInfo { + mat3x2 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; +}; + +layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; + +void main() { + color = texture(screen_textures[screen_id_l], frag_tex_coord); +} +)"; + +constexpr std::string_view fragment_shader_anaglyph = R"( +version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +// Anaglyph Red-Cyan shader based on Dubois algorithm +// Constants taken from the paper: +// "Conversion of a Stereo Pair to Anaglyph with +// the Least-Squares Projection Method" +// Eric Dubois, March 2009 +const mat3 l = mat3( 0.437, 0.449, 0.164, + -0.062,-0.062,-0.024, + -0.048,-0.050,-0.017); +const mat3 r = mat3(-0.011,-0.032,-0.007, + 0.377, 0.761, 0.009, + -0.026,-0.093, 1.234); + +layout (push_constant) uniform DrawInfo { + mat3x2 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; +}; + +layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; + +void main() { + vec4 color_tex_l = texture(screen_textures[screen_id_l], frag_tex_coord); + vec4 color_tex_r = texture(screen_textures[screen_id_r], frag_tex_coord); + color = vec4(color_tex_l.rgb*l+color_tex_r.rgb*r, color_tex_l.a); +} +)"; + +constexpr std::string_view fragment_shader_interlaced = R"( +version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (push_constant) uniform DrawInfo { + mat3x2 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; +}; + +layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; + +void main() { + float screen_row = o_resolution.x * frag_tex_coord.x; + if (int(screen_row) % 2 == reverse_interlaced) + color = texture(screen_textures[screen_id_l], frag_tex_coord); + else + color = texture(screen_textures[screen_id_r], frag_tex_coord); +} +)"; + + +/// Vertex structure that the drawn screen rectangles are composed of. +struct ScreenRectVertex { + ScreenRectVertex() = default; + ScreenRectVertex(float x, float y, float u, float v) : + position{Common::MakeVec(x, y)}, tex_coord{Common::MakeVec(u, v)} {} + + Common::Vec2f position; + Common::Vec2f tex_coord; +}; + +constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 64; + +/** + * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left + * corner and (width, height) on the lower-bottom. + * + * The projection part of the matrix is trivial, hence these operations are represented + * by a 3x2 matrix. + * + * @param flipped Whether the frame should be flipped upside down. + */ +static std::array MakeOrthographicMatrix(float width, float height, bool flipped) { + + std::array matrix; // Laid out in column-major order + + // Last matrix row is implicitly assumed to be [0, 0, 1]. + if (flipped) { + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = 2.f / height; matrix[5] = -1.f; + // clang-format on + } else { + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + // clang-format on + } + + return matrix; +} + +RendererVulkan::RendererVulkan(Frontend::EmuWindow& window) + : RendererBase{window}, instance{window}, scheduler{instance}, renderpass_cache{instance, scheduler}, + runtime{instance, scheduler, renderpass_cache}, swapchain{instance, renderpass_cache}, + vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}} { + + auto& telemetry_session = Core::System::GetInstance().TelemetrySession(); + constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA"); + telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650"); + telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.1"); + + window.mailbox = nullptr; +} + +RendererVulkan::~RendererVulkan() { + vk::Device device = instance.GetDevice(); + + device.destroyPipelineLayout(present_pipeline_layout); + device.destroyDescriptorSetLayout(present_descriptor_layout); + device.destroyDescriptorUpdateTemplate(present_update_template); + device.destroyShaderModule(present_vertex_shader); + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + device.destroyPipeline(present_pipelines[i]); + device.destroyShaderModule(present_shaders[i]); + } + for (std::size_t i = 0; i < present_samplers.size(); i++) { + device.destroySampler(present_samplers[i]); + } +} + +VideoCore::ResultStatus RendererVulkan::Init() { + CompileShaders(); + BuildLayouts(); + BuildPipelines(); + + // Create the rasterizer + rasterizer = std::make_unique(render_window, instance, scheduler, + runtime, renderpass_cache); + + return VideoCore::ResultStatus::Success; +} + +VideoCore::RasterizerInterface* RendererVulkan::Rasterizer() { + return rasterizer.get(); +} + +void RendererVulkan::ShutDown() {} + +void RendererVulkan::Sync() { + rasterizer->SyncEntireState(); +} + +void RendererVulkan::PrepareRendertarget() { + for (int i = 0; i < 3; i++) { + int fb_id = i == 2 ? 1 : 0; + const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; + + // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 + u32 lcd_color_addr = + (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); + lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; + LCD::Regs::ColorFill color_fill = {0}; + LCD::Read(color_fill.raw, lcd_color_addr); + + if (color_fill.is_enabled) { + LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, + screen_infos[i].texture); + } else { + if (screen_infos[i].texture.width != framebuffer.width || + screen_infos[i].texture.height != framebuffer.height || + screen_infos[i].texture.format != framebuffer.color_format) { + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer); + } + + LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); + + // Resize the texture in case the framebuffer size has changed + screen_infos[i].texture.width = framebuffer.width; + screen_infos[i].texture.height = framebuffer.height; + } + } +} + +void RendererVulkan::BeginRendering() { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[current_pipeline]); + + for (std::size_t i = 0; i < screen_infos.size(); i++) { + runtime.Transition(command_buffer, screen_infos[i].display_texture, + vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + } + + const std::array present_textures = { + vk::DescriptorImageInfo{ + .sampler = present_samplers[current_sampler], + .imageView = screen_infos[0].display_texture.image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + }, + vk::DescriptorImageInfo{ + .sampler = present_samplers[current_sampler], + .imageView = screen_infos[1].display_texture.image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + }, + vk::DescriptorImageInfo{ + .sampler = present_samplers[current_sampler], + .imageView = screen_infos[2].display_texture.image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + }, + }; + + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = scheduler.GetDescriptorPool(), + .descriptorSetCount = 1, + .pSetLayouts = &present_descriptor_layout + }; + + vk::Device device = instance.GetDevice(); + vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; + device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures.data()); + + command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, + 0, 1, &set, 0, nullptr); +} + +void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye) { + + if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0) + right_eye = false; + + const PAddr framebuffer_addr = + framebuffer.active_fb == 0 + ? (!right_eye ? framebuffer.address_left1 : framebuffer.address_right1) + : (!right_eye ? framebuffer.address_left2 : framebuffer.address_right2); + + LOG_TRACE(Render_Vulkan, "0x{:08x} bytes from 0x{:08x}({}x{}), fmt {:x}", + framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(), + framebuffer.height.Value(), framebuffer.format); + + int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + std::size_t pixel_stride = framebuffer.stride / bpp; + + // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately + ASSERT(pixel_stride * bpp == framebuffer.stride); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default + // only allows rows to have a memory alignement of 4. + ASSERT(pixel_stride % 4 == 0); + + if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { + ASSERT(false); + // Reset the screen info's display texture to its own permanent texture + /*screen_info.display_texture = &screen_info.texture; + screen_info.display_texcoords = Common::Rectangle(0.f, 0.f, 1.f, 1.f); + + Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); + + vk::Rect2D region{{0, 0}, {framebuffer.width, framebuffer.height}}; + std::span framebuffer_data(VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr), + screen_info.texture.GetSize()); + + screen_info.texture.Upload(0, 1, pixel_stride, region, framebuffer_data);*/ + } +} + +void RendererVulkan::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture) { + const auto color = std::array{color_r / 255.0f, color_g / 255.0f, color_b / 255.0f, 1}; + const vk::ClearColorValue clear_color = { + .float32 = color + }; + + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.clearColorImage(texture.alloc.image, vk::ImageLayout::eShaderReadOnlyOptimal, + clear_color, range); +} + +void RendererVulkan::CompileShaders() { + vk::Device device = instance.GetDevice(); + present_vertex_shader = Compile(vertex_shader, vk::ShaderStageFlagBits::eVertex, + device, ShaderOptimization::Debug); + present_shaders[0] = Compile(fragment_shader, vk::ShaderStageFlagBits::eFragment, + device, ShaderOptimization::Debug); + present_shaders[1] = Compile(fragment_shader_anaglyph, vk::ShaderStageFlagBits::eFragment, + device, ShaderOptimization::Debug); + present_shaders[2] = Compile(fragment_shader_interlaced, vk::ShaderStageFlagBits::eFragment, + device, ShaderOptimization::Debug); + + auto properties = instance.GetPhysicalDevice().getProperties(); + for (std::size_t i = 0; i < present_samplers.size(); i++) { + const vk::Filter filter_mode = i == 0 ? vk::Filter::eLinear : vk::Filter::eNearest; + const vk::SamplerCreateInfo sampler_info = { + .magFilter = filter_mode, + .minFilter = filter_mode, + .mipmapMode = vk::SamplerMipmapMode::eLinear, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + .anisotropyEnable = true, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false + }; + + present_samplers[i] = device.createSampler(sampler_info); + } +} + +void RendererVulkan::BuildLayouts() { + const vk::DescriptorSetLayoutBinding present_layout_binding = { + .binding = 0, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .descriptorCount = 3, + .stageFlags = vk::ShaderStageFlagBits::eFragment + }; + + const vk::DescriptorSetLayoutCreateInfo present_layout_info = { + .bindingCount = 1, + .pBindings = &present_layout_binding + }; + + const vk::DescriptorUpdateTemplateEntry update_template_entry = { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 3, + .descriptorType = vk::DescriptorType::eCombinedImageSampler, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo) + }; + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = 1, + .pDescriptorUpdateEntries = &update_template_entry, + .descriptorSetLayout = present_descriptor_layout + }; + + vk::Device device = instance.GetDevice(); + present_descriptor_layout = device.createDescriptorSetLayout(present_layout_info); + present_update_template = device.createDescriptorUpdateTemplate(template_info); + + const vk::PushConstantRange push_range = { + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .offset = 0, + .size = sizeof(PresentUniformData), + }; + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1, + .pSetLayouts = &present_descriptor_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range + }; + + present_pipeline_layout = device.createPipelineLayout(layout_info); +} + +void RendererVulkan::BuildPipelines() { + const vk::VertexInputBindingDescription binding = { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = vk::VertexInputRate::eVertex + }; + + const std::array attributes = { + vk::VertexInputAttributeDescription{ + .location = 0, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, position) + }, + vk::VertexInputAttributeDescription{ + .location = 1, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, tex_coord) + } + }; + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &binding, + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data() + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleStrip, + .primitiveRestartEnable = false + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = vk::CullModeFlagBits::eNone, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f} + }; + + const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}}; + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &placeholder_viewport, + .scissorCount = 1, + .pScissors = &placeholder_scissor, + }; + + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data() + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = false, + .depthWriteEnable = false, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = false, + .stencilTestEnable = false + }; + + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + const std::array shader_stages = { + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = present_vertex_shader, + .pName = "main" + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = present_shaders[i], + .pName = "main" + }, + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = present_pipeline_layout, + .renderPass = renderpass_cache.GetPresentRenderpass() + }; + + vk::Device device = instance.GetDevice(); + if (const auto result = device.createGraphicsPipeline({}, pipeline_info); + result.result == vk::Result::eSuccess) { + present_pipelines[i] = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Unable to build present pipelines"); + UNREACHABLE(); + } + } +} + +void RendererVulkan::ReloadSampler() { + current_sampler = !Settings::values.filter_mode; +} + +void RendererVulkan::ReloadPipeline() { + switch (Settings::values.render_3d) { + case Settings::StereoRenderOption::Anaglyph: + current_pipeline = 1; + break; + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: + current_pipeline = 2; + draw_info.reverse_interlaced = + Settings::values.render_3d == Settings::StereoRenderOption::ReverseInterlaced; + break; + default: + current_pipeline = 0; + break; + } +} + +void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, + const GPU::Regs::FramebufferConfig& framebuffer) { + texture.format = framebuffer.color_format; + texture.width = framebuffer.width; + texture.height = framebuffer.height; + texture.alloc = runtime.Allocate(framebuffer.width, framebuffer.height, + VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format), + VideoCore::TextureType::Texture2D); +} + +void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h) { + auto& screen_info = screen_infos[screen_id]; + const auto& texcoords = screen_info.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = { + ScreenRectVertex{x, y, texcoords.bottom, texcoords.left}, + ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right}, + ScreenRectVertex{x, y + h, texcoords.top, texcoords.left}, + ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.right}, + }; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + // As this is the "DrawSingleScreenRotated" function, the output resolution dimensions have been + // swapped. If a non-rotated draw-screen function were to be added for book-mode games, those + // should probably be set to the standard (w, h, 1.0 / w, 1.0 / h) ordering. + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info.texture.width); + const float height = static_cast(screen_info.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; + draw_info.screen_id_l = screen_id; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h) { + auto& screen_info = screen_infos[screen_id]; + const auto& texcoords = screen_info.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = { + ScreenRectVertex{x, y, texcoords.bottom, texcoords.right}, + ScreenRectVertex{x + w, y, texcoords.top, texcoords.right}, + ScreenRectVertex{x, y + h, texcoords.bottom, texcoords.left}, + ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.left}, + }; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info.texture.width); + const float height = static_cast(screen_info.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; + draw_info.screen_id_l = screen_id; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, + float x, float y, float w, float h) { + const ScreenInfo& screen_info_l = screen_infos[screen_id_l]; + const auto& texcoords = screen_info_l.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = { + ScreenRectVertex{x, y, texcoords.bottom, texcoords.left}, + ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right}, + ScreenRectVertex{x, y + h, texcoords.top, texcoords.left}, + ScreenRectVertex{x + w, y + h, texcoords.top, texcoords.right} + }; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info_l.texture.width); + const float height = static_cast(screen_info_l.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + + draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; + draw_info.screen_id_l = screen_id_l; + draw_info.screen_id_r = screen_id_r; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, + float x, float y, float w, float h) { + const ScreenInfo& screen_info_l = screen_infos[screen_id_l]; + const auto& texcoords = screen_info_l.display_texcoords; + + u32 size = sizeof(ScreenRectVertex) * 4; + auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + + const std::array vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), + }}; + + std::memcpy(ptr, vertices.data(), size); + vertex_buffer.Commit(size); + + const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); + const float width = static_cast(screen_info_l.texture.width); + const float height = static_cast(screen_info_l.texture.height); + + draw_info.i_resolution = Common::Vec4f{width * scale_factor, height * scale_factor, + 1.0f / (width * scale_factor), + 1.0f / (height * scale_factor)}; + + draw_info.o_resolution = Common::Vec4f{w, h, 1.0f / w, 1.0f / h}; + draw_info.screen_id_l = screen_id_l; + draw_info.screen_id_r = screen_id_r; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(draw_info), &draw_info); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); +} + +void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool flipped) { + if (VideoCore::g_renderer_bg_color_update_requested.exchange(false)) { + // Update background color before drawing + clear_color.float32[0] = Settings::values.bg_red; + clear_color.float32[1] = Settings::values.bg_green; + clear_color.float32[2] = Settings::values.bg_blue; + } + + if (VideoCore::g_renderer_sampler_update_requested.exchange(false)) { + // Set the new filtering mode for the sampler + ReloadSampler(); + } + + if (VideoCore::g_renderer_shader_update_requested.exchange(false)) { + ReloadPipeline(); + } + + const auto& top_screen = layout.top_screen; + const auto& bottom_screen = layout.bottom_screen; + + // Set projection matrix + draw_info.modelview = + MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height), flipped); + + const bool stereo_single_screen = + Settings::values.render_3d == Settings::StereoRenderOption::Anaglyph || + Settings::values.render_3d == Settings::StereoRenderOption::Interlaced || + Settings::values.render_3d == Settings::StereoRenderOption::ReverseInterlaced; + + // Bind necessary state before drawing the screens + BeginRendering(); + + draw_info.layer = 0; + if (layout.top_screen_enabled) { + if (layout.is_rotated) { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreenRotated(0, top_screen.left, + top_screen.top, top_screen.GetWidth(), + top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreenRotated(0, (float)top_screen.left / 2, + (float)top_screen.top, (float)top_screen.GetWidth() / 2, + (float)top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated(1, + ((float)top_screen.left / 2) + ((float)layout.width / 2), + (float)top_screen.top, (float)top_screen.GetWidth() / 2, + (float)top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreenRotated(0, layout.top_screen.left, + layout.top_screen.top, layout.top_screen.GetWidth(), + layout.top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated(1, + layout.cardboard.top_screen_right_eye + + ((float)layout.width / 2), + layout.top_screen.top, layout.top_screen.GetWidth(), + layout.top_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereoRotated(0, 1, (float)top_screen.left, (float)top_screen.top, + (float)top_screen.GetWidth(), (float)top_screen.GetHeight()); + } + } else { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreen(0, (float)top_screen.left, (float)top_screen.top, + (float)top_screen.GetWidth(), (float)top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreen(0, (float)top_screen.left / 2, (float)top_screen.top, + (float)top_screen.GetWidth() / 2, (float)top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(1, + ((float)top_screen.left / 2) + ((float)layout.width / 2), + (float)top_screen.top, (float)top_screen.GetWidth() / 2, + (float)top_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreen(0, layout.top_screen.left, layout.top_screen.top, + layout.top_screen.GetWidth(), layout.top_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(1, + layout.cardboard.top_screen_right_eye + ((float)layout.width / 2), + layout.top_screen.top, layout.top_screen.GetWidth(), + layout.top_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereo(0, 1, (float)top_screen.left, + (float)top_screen.top, (float)top_screen.GetWidth(), + (float)top_screen.GetHeight()); + } + } + } + + draw_info.layer = 0; + if (layout.bottom_screen_enabled) { + if (layout.is_rotated) { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreenRotated(2, (float)bottom_screen.left, + (float)bottom_screen.top, (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreenRotated( + 2, (float)bottom_screen.left / 2, (float)bottom_screen.top, + (float)bottom_screen.GetWidth() / 2, (float)bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated( + 2, ((float)bottom_screen.left / 2) + ((float)layout.width / 2), + (float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2, + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreenRotated(2, layout.bottom_screen.left, + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreenRotated(2, + layout.cardboard.bottom_screen_right_eye + + ((float)layout.width / 2), + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereoRotated(2, 2, (float)bottom_screen.left, (float)bottom_screen.top, + (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } + } else { + if (Settings::values.render_3d == Settings::StereoRenderOption::Off) { + DrawSingleScreen(2, (float)bottom_screen.left, + (float)bottom_screen.top, (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::SideBySide) { + DrawSingleScreen(2, (float)bottom_screen.left / 2, + (float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2, + (float)bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(2, + ((float)bottom_screen.left / 2) + ((float)layout.width / 2), + (float)bottom_screen.top, (float)bottom_screen.GetWidth() / 2, + (float)bottom_screen.GetHeight()); + } else if (Settings::values.render_3d == Settings::StereoRenderOption::CardboardVR) { + DrawSingleScreen(2, layout.bottom_screen.left, + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + draw_info.layer = 1; + DrawSingleScreen(2, + layout.cardboard.bottom_screen_right_eye + + ((float)layout.width / 2), + layout.bottom_screen.top, layout.bottom_screen.GetWidth(), + layout.bottom_screen.GetHeight()); + } else if (stereo_single_screen) { + DrawSingleScreenStereo(2, 2, (float)bottom_screen.left, + (float)bottom_screen.top, (float)bottom_screen.GetWidth(), + (float)bottom_screen.GetHeight()); + } + } + } +} + +void RendererVulkan::SwapBuffers() { + const auto& layout = render_window.GetFramebufferLayout(); + PrepareRendertarget(); + + // Create swapchain if needed + if (swapchain.NeedsRecreation()) { + swapchain.Create(layout.width, layout.height, false); + } + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = static_cast(layout.width), + .height = static_cast(layout.height), + .minDepth = 0.0f, + .maxDepth = 1.0f + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {layout.width, layout.height} + }; + + const vk::ClearValue clear_value = { + .color = clear_color + }; + + const vk::RenderPassBeginInfo begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = swapchain.GetFramebuffer(), + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setViewport(0, viewport); + command_buffer.setScissor(0, scissor); + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + + DrawScreens(layout, false); + + // Flush all buffers to make the data visible to the GPU before submitting + vertex_buffer.Flush(); + rasterizer->FlushBuffers(); + + command_buffer.endRenderPass(); + scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore()); + + // Inform texture runtime about the switch + runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex()); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 000000000..c0f3c128c --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -0,0 +1,128 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Vulkan { + +/// Structure used for storing information about the textures for each 3DS screen +struct TextureInfo { + ImageAlloc alloc; + u32 width; + u32 height; + GPU::Regs::PixelFormat format; +}; + +/// Structure used for storing information about the display target for each 3DS screen +struct ScreenInfo { + ImageAlloc display_texture; + Common::Rectangle display_texcoords; + TextureInfo texture; + vk::Sampler sampler; +}; + +// Uniform data used for presenting the 3DS screens +struct PresentUniformData { + std::array modelview; + Common::Vec4f i_resolution; + Common::Vec4f o_resolution; + int screen_id_l = 0; + int screen_id_r = 0; + int layer = 0; + int reverse_interlaced = 0; + + // Returns an immutable byte view of the uniform data + auto AsBytes() const { + return std::as_bytes(std::span{this, 1}); + } +}; + +static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!"); + +constexpr u32 PRESENT_PIPELINES = 3; + +class RasterizerVulkan; + +class RendererVulkan : public RendererBase { +public: + RendererVulkan(Frontend::EmuWindow& window); + ~RendererVulkan() override; + + VideoCore::ResultStatus Init() override; + VideoCore::RasterizerInterface* Rasterizer() override; + void ShutDown() override; + void SwapBuffers() override; + void TryPresent(int timeout_ms) override {} + void PrepareVideoDumping() override {} + void CleanupVideoDumping() override {} + void Sync() override; + +private: + void ReloadSampler(); + void ReloadPipeline(); + void CompileShaders(); + void BuildLayouts(); + void BuildPipelines(); + void ConfigureRenderPipeline(); + void PrepareRendertarget(); + void BeginRendering(); + void ConfigureFramebufferTexture(TextureInfo& texture, const GPU::Regs::FramebufferConfig& framebuffer); + + void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped); + void DrawSingleScreenRotated(u32 screen_id, float x, float y, float w, float h); + void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h); + void DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h); + void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, float h); + + void UpdateFramerate(); + + /// Loads framebuffer from emulated memory into the display information structure + void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye); + + /// Fills active OpenGL texture with the given RGB color. + void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); + +private: + Instance instance; + TaskScheduler scheduler; + RenderpassCache renderpass_cache; + TextureRuntime runtime; + Swapchain swapchain; + std::unique_ptr rasterizer; + StreamBuffer vertex_buffer; + + // Present pipelines (Normal, Anaglyph, Interlaced) + vk::PipelineLayout present_pipeline_layout; + vk::DescriptorSetLayout present_descriptor_layout; + vk::DescriptorUpdateTemplate present_update_template; + std::array present_pipelines; + std::array present_descriptor_sets; + std::array present_shaders; + std::array present_samplers; + vk::ShaderModule present_vertex_shader; + u32 current_pipeline = 0; + u32 current_sampler = 0; + + /// Display information for top and bottom screens respectively + std::array screen_infos; + PresentUniformData draw_info{}; + vk::ClearColorValue clear_color{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4b3d9a33d..74975845c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -139,6 +139,8 @@ PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, descriptor_dirty.fill(true); LoadDiskCache(); + trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex, + instance.GetDevice(), ShaderOptimization::Debug); } PipelineCache::~PipelineCache() { @@ -152,6 +154,18 @@ PipelineCache::~PipelineCache() { device.destroyDescriptorUpdateTemplate(update_templates[i]); } + for (auto& [key, module] : programmable_vertex_shaders.shader_cache) { + device.destroyShaderModule(module); + } + + for (auto& [key, module] : fixed_geometry_shaders.shaders) { + device.destroyShaderModule(module); + } + + for (auto& [key, module] : fragment_shaders.shaders) { + device.destroyShaderModule(module); + } + for (const auto& [hash, pipeline] : graphics_pipelines) { device.destroyPipeline(pipeline); } @@ -224,7 +238,7 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { shader_hashes[ProgramType::FS] = config.Hash(); } -void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_view) { +void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { const DescriptorData data = { .image_info = vk::DescriptorImageInfo{ .imageView = image_view, @@ -232,10 +246,21 @@ void PipelineCache::BindTexture(u32 set, u32 descriptor, vk::ImageView image_vie } }; - SetBinding(set, descriptor, data); + SetBinding(1, binding, data); } -void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 offset, u32 size) { +void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { + const DescriptorData data = { + .image_info = vk::DescriptorImageInfo{ + .imageView = image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + } + }; + + SetBinding(3, binding, data); +} + +void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { const DescriptorData data = { .buffer_info = vk::DescriptorBufferInfo{ .buffer = buffer, @@ -244,25 +269,25 @@ void PipelineCache::BindBuffer(u32 set, u32 descriptor, vk::Buffer buffer, u32 o } }; - SetBinding(set, descriptor, data); + SetBinding(0, binding, data); } -void PipelineCache::BindTexelBuffer(u32 set, u32 descriptor, vk::BufferView buffer_view) { +void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { const DescriptorData data = { .buffer_view = buffer_view }; - SetBinding(set, descriptor, data); + SetBinding(0, binding, data); } -void PipelineCache::BindSampler(u32 set, u32 descriptor, vk::Sampler sampler) { +void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) { const DescriptorData data = { .image_info = vk::DescriptorImageInfo{ .sampler = sampler } }; - SetBinding(set, descriptor, data); + SetBinding(2, binding, data); } void PipelineCache::SetViewport(float x, float y, float width, float height) { @@ -454,13 +479,25 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f} }; - const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; - const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}}; + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = 1.0f, + .height = 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {1, 1} + }; + const vk::PipelineViewportStateCreateInfo viewport_info = { .viewportCount = 1, - .pViewports = &placeholder_viewport, + .pViewports = &viewport, .scissorCount = 1, - .pScissors = &placeholder_scissor, + .pScissors = &scissor, }; const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported(); @@ -483,7 +520,8 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { }; const vk::PipelineDynamicStateCreateInfo dynamic_info = { - .dynamicStateCount = extended_dynamic_states ? 14u : 6u, + .dynamicStateCount = + extended_dynamic_states ? static_cast(dynamic_states.size()) : 6u, .pDynamicStates = dynamic_states.data() }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index be85f3cfe..0b75abc3b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -106,6 +106,16 @@ struct PipelineInfo { VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8; RasterizationState rasterization{}; DepthStencilState depth_stencil{}; + + bool IsDepthWriteEnabled() const { + const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8; + const bool depth_write = + depth_stencil.depth_test_enable && depth_stencil.depth_write_enable; + const bool stencil_write = + has_stencil && depth_stencil.stencil_test_enable && depth_stencil.stencil_write_mask != 0; + + return depth_write || stencil_write; + } }; union DescriptorData { @@ -164,17 +174,20 @@ public: /// Binds a fragment shader generated from PICA state void UseFragmentShader(const Pica::Regs& regs); - /// Binds a texture to the specified descriptor - void BindTexture(u32 set, u32 binding, vk::ImageView view); + /// Binds a texture to the specified binding + void BindTexture(u32 binding, vk::ImageView image_view); - /// Binds a buffer to the specified descriptor - void BindBuffer(u32 set, u32 binding, vk::Buffer buffer, u32 offset, u32 size); + /// Binds a storage image to the specified binding + void BindStorageImage(u32 binding, vk::ImageView image_view); - /// Binds a buffer to the specified descriptor - void BindTexelBuffer(u32 set, u32 binding, vk::BufferView buffer_view); + /// Binds a buffer to the specified binding + void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size); - /// Binds a sampler to the specified descriptor - void BindSampler(u32 set, u32 binding, vk::Sampler sampler); + /// Binds a buffer to the specified binding + void BindTexelBuffer(u32 binding, vk::BufferView buffer_view); + + /// Binds a sampler to the specified binding + void BindSampler(u32 binding, vk::Sampler sampler); /// Sets the viewport rectangle to the provided values void SetViewport(float x, float y, float width, float height); @@ -185,6 +198,10 @@ public: /// Marks all descriptor sets as dirty void MarkDescriptorSetsDirty(); + vk::ImageView GetTexture(u32 set, u32 binding) const { + return update_data[set][binding].image_info.imageView; + } + private: /// Binds a resource to the provided binding void SetBinding(u32 set, u32 binding, DescriptorData data); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..0d698c8b9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,2128 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/video_core.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); +MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); + +RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { + position[0] = v.pos.x.ToFloat32(); + position[1] = v.pos.y.ToFloat32(); + position[2] = v.pos.z.ToFloat32(); + position[3] = v.pos.w.ToFloat32(); + color[0] = v.color.x.ToFloat32(); + color[1] = v.color.y.ToFloat32(); + color[2] = v.color.z.ToFloat32(); + color[3] = v.color.w.ToFloat32(); + tex_coord0[0] = v.tc0.x.ToFloat32(); + tex_coord0[1] = v.tc0.y.ToFloat32(); + tex_coord1[0] = v.tc1.x.ToFloat32(); + tex_coord1[1] = v.tc1.y.ToFloat32(); + tex_coord2[0] = v.tc2.x.ToFloat32(); + tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + normquat = -normquat; + } +} + +/** + * This maps to the following layout in GLSL code: + * layout(location = 0) in vec4 vert_position; + * layout(location = 1) in vec4 vert_color; + * layout(location = 2) in vec2 vert_texcoord0; + * layout(location = 3) in vec2 vert_texcoord1; + * layout(location = 4) in vec2 vert_texcoord2; + * layout(location = 5) in float vert_texcoord0_w; + * layout(location = 6) in vec4 vert_normquat; + * layout(location = 7) in vec3 vert_view; + */ +constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() { + VertexLayout layout{}; + layout.attribute_count = 8; + layout.binding_count = 1; + + // Define binding + layout.bindings[0].binding.Assign(0); + layout.bindings[0].fixed.Assign(0); + layout.bindings[0].stride.Assign(sizeof(HardwareVertex)); + + // Define attributes + constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; + u32 offset = 0; + + for (u32 loc = 0; loc < 8; loc++) { + VertexAttribute& attribute = layout.attributes[loc]; + attribute.binding.Assign(0); + attribute.location.Assign(loc); + attribute.offset.Assign(offset); + attribute.type.Assign(AttribType::Float); + attribute.size.Assign(sizes[loc]); + offset += sizes[loc] * sizeof(float); + } + + return layout; +} + +constexpr u32 VERTEX_BUFFER_SIZE = 16 * 1024 * 1024; +constexpr u32 INDEX_BUFFER_SIZE = 2 * 1024 * 1024; +constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; +constexpr u32 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; + +constexpr std::array TEXTURE_BUFFER_LF_FORMATS = { + vk::Format::eR32G32Sfloat +}; + +constexpr std::array TEXTURE_BUFFER_FORMATS = { + vk::Format::eR32G32Sfloat, + vk::Format::eR32G32B32A32Sfloat +}; + +RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, + TaskScheduler& scheduler, TextureRuntime& runtime, + RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, runtime{runtime}, renderpass_cache{renderpass_cache}, + res_cache{*this, runtime}, pipeline_cache{instance, scheduler, renderpass_cache}, + vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, + uniform_buffer{instance, scheduler, UNIFORM_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformBuffer, {}}, + index_buffer{instance, scheduler, INDEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eIndexBuffer, {}}, + texture_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TEXTURE_BUFFER_FORMATS}, + texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TEXTURE_BUFFER_LF_FORMATS} { + + // Create a 1x1 clear texture to use in the NULL case, + default_texture = runtime.Allocate(1, 1, VideoCore::PixelFormat::RGBA8, + VideoCore::TextureType::Texture2D); + runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture, + vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + + uniform_block_data.lighting_lut_dirty.fill(true); + + uniform_buffer_alignment = instance.UniformMinAlignment(); + uniform_size_aligned_vs = + Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + + // Define vertex layout for software shaders + pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout(); + + const SamplerInfo default_sampler_info = { + .mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, + .min_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, + .mip_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, + .wrap_s = Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder, + .wrap_t = Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder + }; + + default_sampler = CreateSampler(default_sampler_info); + + // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize + // all descriptor sets even the ones we don't use. Use default_texture for this + for (u32 i = 0; i < 4; i++) { + pipeline_cache.BindTexture(i, default_texture.image_view); + pipeline_cache.BindSampler(i, default_sampler); + } + + for (u32 i = 0; i < 7; i++) { + pipeline_cache.BindStorageImage(i, default_texture.image_view); + } + + // Explicitly call the derived version to avoid warnings about calling virtual + // methods in the constructor + RasterizerVulkan::SyncEntireState(); +} + +RasterizerVulkan::~RasterizerVulkan() { + VmaAllocator allocator = instance.GetAllocator(); + vk::Device device = instance.GetDevice(); + device.waitIdle(); + + for (auto& [key, sampler] : samplers) { + device.destroySampler(sampler); + } + + for (auto& [key, framebuffer] : framebuffers) { + device.destroyFramebuffer(framebuffer); + } + + vmaDestroyImage(allocator, default_texture.image, default_texture.allocation); + device.destroyImageView(default_texture.image_view); + device.destroySampler(default_sampler); +} + +void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + //shader_program_manager->LoadDiskCache(stop_loading, callback); +} + +void RasterizerVulkan::SyncEntireState() { + // Sync fixed function Vulkan state + SyncClipEnabled(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncLogicOp(); + SyncStencilTest(); + SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); + + // Sync uniforms + SyncClipCoef(); + SyncDepthScale(); + SyncDepthOffset(); + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.texturing.GetTevStages(); + for (std::size_t index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (unsigned light_index = 0; light_index < 8; light_index++) { + SyncLightSpecular0(light_index); + SyncLightSpecular1(light_index); + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + SyncLightDistanceAttenuationBias(light_index); + SyncLightDistanceAttenuationScale(light_index); + } + + SyncFogColor(); + SyncProcTexNoise(); + SyncProcTexBias(); + SyncShadowBias(); + SyncShadowTextureBias(); +} + +/** + * This is a helper function to resolve an issue when interpolating opposite quaternions. See below + * for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore + * making Dot(Q1, -Q2) positive. + * + * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is + * correct for most cases but can still rotate around the long way sometimes. An implementation + * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check + * between each step would work for those cases at the cost of being more complex to implement. + * + * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around + * these issues, making this basic implementation actually more accurate to the hardware. + */ +static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { + Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; + Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; + + return (Common::Dot(a, b) < 0.f); +} + +void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); +} + +static constexpr std::array vs_attrib_types = { + AttribType::Byte, // VertexAttributeFormat::BYTE + AttribType::Ubyte, // VertexAttributeFormat::UBYTE + AttribType::Short, // VertexAttributeFormat::SHORT + AttribType::Float // VertexAttributeFormat::FLOAT +}; + +struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; +}; + +RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + + u32 vertex_min; + u32 vertex_max; + if (is_indexed) { + const auto& index_info = regs.pipeline.index_array; + const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; + const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); + const u16* index_address_16 = reinterpret_cast(index_address_8); + const bool index_u16 = index_info.format != 0; + + vertex_min = 0xFFFF; + vertex_max = 0; + const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + res_cache.FlushRegion(address, size, nullptr); + for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { + const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; + vertex_min = std::min(vertex_min, vertex); + vertex_max = std::max(vertex_max, vertex); + } + } else { + vertex_min = regs.pipeline.vertex_offset; + vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; + } + + const u32 vertex_num = vertex_max - vertex_min + 1; + u32 vs_input_size = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count != 0) { + vs_input_size += loader.byte_count * vertex_num; + } + } + + return {vertex_min, vertex_max, vs_input_size}; +} + +void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max) { + auto [array_ptr, array_offset, _] = vertex_buffer.Map(vs_input_size, 4); + + /** + * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU + * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base + * address containing the vertex array data. The data for each attribute loader (i) can be found + * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought + * as something analogous to Vulkan bindings. The user can store attributes in separate loaders + * or interleave them in the same loader. + */ + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE + + VertexLayout layout{}; + std::array enable_attributes{}; + std::array binding_offsets{}; + + u32 buffer_offset = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count == 0 || loader.byte_count == 0) { + continue; + } + + // Analyze the attribute loader by checking which attributes it provides + u32 offset = 0; + for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) { + u32 attribute_index = loader.GetComponent(comp); + if (attribute_index < 12) { + if (u32 size = vertex_attributes.GetNumElements(attribute_index); size != 0) { + offset = Common::AlignUp(offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); + + const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); + const u32 attrib_format = static_cast(vertex_attributes.GetFormat(attribute_index)); + const AttribType type = vs_attrib_types[attrib_format]; + + // Define the attribute + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(input_reg); + attribute.offset.Assign(offset); + attribute.type.Assign(type); + attribute.size.Assign(size); + + enable_attributes[input_reg] = true; + offset += vertex_attributes.GetStride(attribute_index); + } + + } else { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } + } + + const PAddr data_addr = base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); + const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + const u32 data_size = loader.byte_count * vertex_num; + + res_cache.FlushRegion(data_addr, data_size, nullptr); + std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); + + // Create the binding associated with this loader + VertexBinding& binding = layout.bindings.at(layout.binding_count); + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(0); + binding.stride.Assign(loader.byte_count); + + // Keep track of the binding offsets so we can bind the vertex buffer later + binding_offsets[layout.binding_count++] = array_offset + buffer_offset; + array_ptr += data_size; + buffer_offset += data_size; + } + + // Reserve the last binding for fixed attributes + u32 offset = 0; + for (std::size_t i = 0; i < 16; i++) { + if (vertex_attributes.IsDefaultAttribute(i)) { + const u32 reg = regs.vs.GetRegisterForAttribute(i); + if (!enable_attributes[reg]) { + const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + const std::array data = { + attr.x.ToFloat32(), + attr.y.ToFloat32(), + attr.z.ToFloat32(), + attr.w.ToFloat32() + }; + + // Copy the data to the end of the buffer + const u32 data_size = sizeof(float) * static_cast(data.size()); + std::memcpy(array_ptr, data.data(), data_size); + + // Define the binding. Note that the counter is not incremented + VertexBinding& binding = layout.bindings.at(layout.binding_count); + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(1); + binding.stride.Assign(offset); + + VertexAttribute& attribute = layout.attributes.at(layout.attribute_count++); + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(reg); + attribute.offset.Assign(offset); + attribute.type.Assign(AttribType::Float); + attribute.size.Assign(4); + + offset += data_size; + array_ptr += data_size; + binding_offsets[layout.binding_count] = array_offset + buffer_offset; + } + } + } + + pipeline_info.vertex_layout = layout; + vertex_buffer.Commit(vs_input_size); + + std::array buffers; + buffers.fill(vertex_buffer.GetHandle()); + + // Bind the vertex buffers with all the bindings + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindVertexBuffers(0, layout.binding_count, buffers.data(), binding_offsets.data()); +} + +bool RasterizerVulkan::SetupVertexShader() { + MICROPROFILE_SCOPE(OpenGL_VS); + return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs); +} + +bool RasterizerVulkan::SetupGeometryShader() { + MICROPROFILE_SCOPE(OpenGL_GS); + const auto& regs = Pica::g_state.regs; + + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); + return false; + } + + pipeline_cache.UseFixedGeometryShader(regs); + return true; +} + +bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { + return false; + } + if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { + return false; + } + } + + if (!SetupVertexShader()) { + return false; + } + + if (!SetupGeometryShader()) { + return false; + } + + return Draw(true, is_indexed); +} + +bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + + auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); + + if (vs_input_size > VERTEX_BUFFER_SIZE) { + LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size); + return false; + } + + SetupVertexArray(vs_input_size, vs_input_index_min, vs_input_index_max); + pipeline_cache.BindPipeline(pipeline_info); + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + if (is_indexed) { + bool index_u16 = regs.pipeline.index_array.format != 0; + const u64 index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + + if (index_buffer_size > INDEX_BUFFER_SIZE) { + LOG_WARNING(Render_Vulkan, "Too large index input size {}", index_buffer_size); + return false; + } + + const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( + regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + + // Upload index buffer data to the GPU + auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size, 4); + std::memcpy(index_ptr, index_data, index_buffer_size); + index_buffer.Commit(index_buffer_size); + + vk::IndexType index_type = index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT; + command_buffer.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type); + + // Submit draw + command_buffer.drawIndexed(regs.pipeline.num_vertices, 1, 0, 0, 0); + } else { + command_buffer.draw(regs.pipeline.num_vertices, 1, 0, 0); + } + + return true; +} + +void RasterizerVulkan::DrawTriangles() { + if (vertex_batch.empty()) { + return; + } + + Draw(false, false); +} + +bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { + MICROPROFILE_SCOPE(OpenGL_Drawing); + const auto& regs = Pica::g_state.regs; + + pipeline_info.color_attachment = + VideoCore::PixelFormatFromColorFormat(regs.framebuffer.framebuffer.color_format); + pipeline_info.depth_attachment = + VideoCore::PixelFormatFromDepthFormat(regs.framebuffer.framebuffer.depth_format); + + const bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask.Value(); + const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled(); + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && pipeline_info.depth_stencil.stencil_test_enable)); + + const auto viewport_rect_unscaled = Common::Rectangle{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + auto [color_surface, depth_surface, surfaces_rect] = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + const VideoCore::Rect2D draw_rect = { + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top)) + }; + + auto valid_surface = color_surface ? color_surface : depth_surface; + const FramebufferInfo framebuffer_info = { + .color = color_surface ? color_surface->alloc.image_view : VK_NULL_HANDLE, + .depth = depth_surface ? depth_surface->alloc.image_view : VK_NULL_HANDLE, + .renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment, + pipeline_info.depth_attachment, false), + .width = valid_surface->GetScaledWidth(), + .height = valid_surface->GetScaledHeight() + }; + + auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{}); + if (new_framebuffer) { + it->second = CreateFramebuffer(framebuffer_info); + } + + ImageAlloc color_alloc = + color_surface ? color_surface->alloc : ImageAlloc{}; + ImageAlloc depth_alloc = + depth_surface ? depth_surface->alloc : ImageAlloc{}; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + runtime.Transition(command_buffer, color_alloc, + vk::ImageLayout::eColorAttachmentOptimal, 0, color_alloc.levels); + runtime.Transition(command_buffer, depth_alloc, + vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, depth_alloc.levels); + + const vk::RenderPassBeginInfo renderpass_begin = { + .renderPass = + renderpass_cache.GetRenderpass(pipeline_info.color_attachment, + pipeline_info.depth_attachment, false), + .framebuffer = it->second, + .renderArea = vk::Rect2D{ + .offset = {static_cast(draw_rect.left), static_cast(draw_rect.bottom)}, + .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()} + }, + + .clearValueCount = 0, + .pClearValues = nullptr + }; + + renderpass_cache.EnterRenderpass(renderpass_begin); + + // Sync the viewport + pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, + surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, + viewport_rect_unscaled.GetWidth() * res_scale, + viewport_rect_unscaled.GetHeight() * res_scale); + + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; + uniform_block_data.dirty = true; + } + + // Scissor checks are window-, not viewport-relative, which means that if the cached texture + // sub-rect changes, the scissor bounds also need to be updated. + int scissor_x1 = static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + int scissor_y1 = static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); + + // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when + // scaling or doing multisampling. + int scissor_x2 = static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + int scissor_y2 = static_cast(surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); + + if (uniform_block_data.data.scissor_x1 != scissor_x1 || + uniform_block_data.data.scissor_x2 != scissor_x2 || + uniform_block_data.data.scissor_y1 != scissor_y1 || + uniform_block_data.data.scissor_y2 != scissor_y2) { + + uniform_block_data.data.scissor_x1 = scissor_x1; + uniform_block_data.data.scissor_x2 = scissor_x2; + uniform_block_data.data.scissor_y1 = scissor_y1; + uniform_block_data.data.scissor_y2 = scissor_y2; + uniform_block_data.dirty = true; + } + + /*bool need_duplicate_texture = false; + auto CheckBarrier = [&need_duplicate_texture, &color_surface](vk::ImageView handle) { + if (color_surface && color_surface->alloc.image_view == handle) { + need_duplicate_texture = true; + } + };*/ + + auto CheckBarrier = [this, &color_surface = color_surface](vk::ImageView image_view, u32 texture_index) { + if (color_surface && color_surface->alloc.image_view == image_view) { + //auto temp_tex = backend->CreateTexture(texture->GetInfo()); + //temp_tex->CopyFrom(texture); + pipeline_cache.BindTexture(texture_index, image_view); + } else { + pipeline_cache.BindTexture(texture_index, image_view); + } + }; + + // Sync and bind the texture surfaces + const auto pica_textures = regs.texturing.GetTextures(); + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + if (texture.enabled) { + /*if (texture_index == 0) { + using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + if (!allow_shadow) + continue; + + Surface surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); + } else { + state.image_shadow_texture_px = 0; + } + continue; + } + case TextureType::ShadowCube: { + if (!allow_shadow) + continue; + Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister( + texture.config, texture.format); + Surface surface; + + using CubeFace = Pica::TexturingRegs::CubeFace; + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); + } else { + state.image_shadow_texture_px = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_nx = surface->texture.handle); + } else { + state.image_shadow_texture_nx = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_py = surface->texture.handle); + } else { + state.image_shadow_texture_py = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_ny = surface->texture.handle); + } else { + state.image_shadow_texture_ny = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_pz = surface->texture.handle); + } else { + state.image_shadow_texture_pz = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_nz = surface->texture.handle); + } else { + state.image_shadow_texture_nz = 0; + } + + continue; + } + case TextureType::TextureCube: + using CubeFace = Pica::TexturingRegs::CubeFace; + TextureCubeConfig config; + config.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + config.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + config.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + config.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + config.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + config.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + config.width = texture.config.width; + config.format = texture.format; + state.texture_cube_unit.texture_cube = + res_cache.GetTextureCube(config).texture.handle; + + texture_cube_sampler.SyncWithConfig(texture.config); + state.texture_units[texture_index].texture_2d = 0; + continue; // Texture unit 0 setup finished. Continue to next unit + default: + state.texture_cube_unit.texture_cube = 0; + } + }*/ + + //texture_samplers[texture_index].SyncWithConfig(texture.config); + + // Update sampler key + texture_samplers[texture_index] = SamplerInfo{ + .mag_filter = texture.config.mag_filter, + .min_filter = texture.config.min_filter, + .mip_filter = texture.config.mip_filter, + .wrap_s = texture.config.wrap_s, + .wrap_t = texture.config.wrap_t, + .border_color = texture.config.border_color.raw, + .lod_min = texture.config.lod.min_level, + .lod_max = texture.config.lod.max_level, + .lod_bias = texture.config.lod.bias + }; + + // Search the cache and bind the appropriate sampler + const SamplerInfo& key = texture_samplers[texture_index]; + if (auto it = samplers.find(key); it != samplers.end()) { + pipeline_cache.BindSampler(texture_index, it->second); + } else { + vk::Sampler texture_sampler = CreateSampler(key); + samplers.emplace(key, texture_sampler); + pipeline_cache.BindSampler(texture_index, texture_sampler); + } + + auto surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + runtime.Transition(command_buffer, surface->alloc, + vk::ImageLayout::eShaderReadOnlyOptimal, 0, surface->alloc.levels); + CheckBarrier(surface->alloc.image_view, texture_index); + } else { + // Can occur when texture addr is null or its memory is unmapped/invalid + // HACK: In this case, the correct behaviour for the PICA is to use the last + // rendered colour. But because this would be impractical to implement, the + // next best alternative is to use a clear texture, essentially skipping + // the geometry in question. + // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn + // on the male character's face, which in the OpenGL default appear black. + //state.texture_units[texture_index].texture_2d = default_texture; + pipeline_cache.BindTexture(texture_index, default_texture.image_view); + } + } else { + pipeline_cache.BindTexture(texture_index, default_texture.image_view); + pipeline_cache.BindSampler(texture_index, default_sampler); + } + } + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); + + // Sync the uniform data + UploadUniforms(accelerate); + + // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing outside of the framebuffer region + pipeline_cache.SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(), draw_rect.GetHeight()); + + // Draw the vertex batch + bool succeeded = true; + if (accelerate) { + succeeded = AccelerateDrawBatchInternal(is_indexed); + } else { + pipeline_cache.UseTrivialVertexShader(); + pipeline_cache.UseTrivialGeometryShader(); + + // Bind the vertex buffer at the current mapped offset. This effectively means + // that when base_vertex is zero the GPU will start drawing from the current mapped + // offset not the start of the buffer. + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), vertex_buffer.GetBufferOffset()); + + const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); + const u32 batch_size = static_cast(vertex_batch.size()); + for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { + const u32 vertices = std::min(max_vertices, batch_size - base_vertex); + const u32 vertex_size = vertices * sizeof(HardwareVertex); + + // Copy vertex data + auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); + std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); + vertex_buffer.Commit(vertex_size); + + pipeline_cache.BindPipeline(pipeline_info); + command_buffer.draw(vertices, 1, base_vertex, 0); + } + } + + vertex_batch.clear(); + + // Mark framebuffer surfaces as dirty + const VideoCore::Rect2D draw_rect_unscaled = { + draw_rect.left / res_scale, + draw_rect.top / res_scale, + draw_rect.right / res_scale, + draw_rect.bottom / res_scale + }; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } + + return succeeded; +} + +void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) { + const auto& regs = Pica::g_state.regs; + + switch (id) { + // Culling + case PICA_REG_INDEX(rasterizer.cull_mode): + SyncCullMode(); + break; + + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX(rasterizer.clip_coef[0]): + case PICA_REG_INDEX(rasterizer.clip_coef[1]): + case PICA_REG_INDEX(rasterizer.clip_coef[2]): + case PICA_REG_INDEX(rasterizer.clip_coef[3]): + SyncClipCoef(); + break; + + // Depth modifiers + case PICA_REG_INDEX(rasterizer.viewport_depth_range): + SyncDepthScale(); + break; + case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane): + SyncDepthOffset(); + break; + + // Depth buffering + case PICA_REG_INDEX(rasterizer.depthmap_enable): + shader_dirty = true; + break; + + // Blending + case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + SyncBlendEnabled(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.blend_const): + SyncBlendColor(); + break; + + // Shadow texture + case PICA_REG_INDEX(texturing.shadow): + SyncShadowTextureBias(); + break; + + // Fog state + case PICA_REG_INDEX(texturing.fog_color): + SyncFogColor(); + break; + case PICA_REG_INDEX(texturing.fog_lut_data[0]): + case PICA_REG_INDEX(texturing.fog_lut_data[1]): + case PICA_REG_INDEX(texturing.fog_lut_data[2]): + case PICA_REG_INDEX(texturing.fog_lut_data[3]): + case PICA_REG_INDEX(texturing.fog_lut_data[4]): + case PICA_REG_INDEX(texturing.fog_lut_data[5]): + case PICA_REG_INDEX(texturing.fog_lut_data[6]): + case PICA_REG_INDEX(texturing.fog_lut_data[7]): + uniform_block_data.fog_lut_dirty = true; + break; + + // ProcTex state + case PICA_REG_INDEX(texturing.proctex): + case PICA_REG_INDEX(texturing.proctex_lut): + case PICA_REG_INDEX(texturing.proctex_lut_offset): + SyncProcTexBias(); + shader_dirty = true; + break; + + case PICA_REG_INDEX(texturing.proctex_noise_u): + case PICA_REG_INDEX(texturing.proctex_noise_v): + case PICA_REG_INDEX(texturing.proctex_noise_frequency): + SyncProcTexNoise(); + break; + + case PICA_REG_INDEX(texturing.proctex_lut_data[0]): + case PICA_REG_INDEX(texturing.proctex_lut_data[1]): + case PICA_REG_INDEX(texturing.proctex_lut_data[2]): + case PICA_REG_INDEX(texturing.proctex_lut_data[3]): + case PICA_REG_INDEX(texturing.proctex_lut_data[4]): + case PICA_REG_INDEX(texturing.proctex_lut_data[5]): + case PICA_REG_INDEX(texturing.proctex_lut_data[6]): + case PICA_REG_INDEX(texturing.proctex_lut_data[7]): + using Pica::TexturingRegs; + switch (regs.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + uniform_block_data.proctex_noise_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + uniform_block_data.proctex_color_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + uniform_block_data.proctex_alpha_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::Color: + uniform_block_data.proctex_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + uniform_block_data.proctex_diff_lut_dirty = true; + break; + } + break; + + // Alpha test + case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): + SyncAlphaTest(); + shader_dirty = true; + break; + + // Sync GL stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): + SyncStencilTest(); + break; + + // Sync GL depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) + case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): + SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync GL depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync GL color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): + SyncColorWriteMask(); + break; + + case PICA_REG_INDEX(framebuffer.shadow): + SyncShadowBias(); + break; + + // Scissor test + case PICA_REG_INDEX(rasterizer.scissor_test.mode): + shader_dirty = true; + break; + + // Logic op + case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + SyncLogicOp(); + break; + + case PICA_REG_INDEX(texturing.main_config): + shader_dirty = true; + break; + + // Texture 0 type + case PICA_REG_INDEX(texturing.texture0.type): + shader_dirty = true; + break; + + // TEV stages + // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) + case PICA_REG_INDEX(texturing.tev_stage0.color_source1): + case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage0.color_op): + case PICA_REG_INDEX(texturing.tev_stage0.color_scale): + case PICA_REG_INDEX(texturing.tev_stage1.color_source1): + case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage1.color_op): + case PICA_REG_INDEX(texturing.tev_stage1.color_scale): + case PICA_REG_INDEX(texturing.tev_stage2.color_source1): + case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage2.color_op): + case PICA_REG_INDEX(texturing.tev_stage2.color_scale): + case PICA_REG_INDEX(texturing.tev_stage3.color_source1): + case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage3.color_op): + case PICA_REG_INDEX(texturing.tev_stage3.color_scale): + case PICA_REG_INDEX(texturing.tev_stage4.color_source1): + case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage4.color_op): + case PICA_REG_INDEX(texturing.tev_stage4.color_scale): + case PICA_REG_INDEX(texturing.tev_stage5.color_source1): + case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage5.color_op): + case PICA_REG_INDEX(texturing.tev_stage5.color_scale): + case PICA_REG_INDEX(texturing.tev_combiner_buffer_input): + shader_dirty = true; + break; + case PICA_REG_INDEX(texturing.tev_stage0.const_r): + SyncTevConstColor(0, regs.texturing.tev_stage0); + break; + case PICA_REG_INDEX(texturing.tev_stage1.const_r): + SyncTevConstColor(1, regs.texturing.tev_stage1); + break; + case PICA_REG_INDEX(texturing.tev_stage2.const_r): + SyncTevConstColor(2, regs.texturing.tev_stage2); + break; + case PICA_REG_INDEX(texturing.tev_stage3.const_r): + SyncTevConstColor(3, regs.texturing.tev_stage3); + break; + case PICA_REG_INDEX(texturing.tev_stage4.const_r): + SyncTevConstColor(4, regs.texturing.tev_stage4); + break; + case PICA_REG_INDEX(texturing.tev_stage5.const_r): + SyncTevConstColor(5, regs.texturing.tev_stage5); + break; + + // TEV combiner buffer color + case PICA_REG_INDEX(texturing.tev_combiner_buffer_color): + SyncCombinerColor(); + break; + + // Fragment lighting switches + case PICA_REG_INDEX(lighting.disable): + case PICA_REG_INDEX(lighting.max_light_index): + case PICA_REG_INDEX(lighting.config0): + case PICA_REG_INDEX(lighting.config1): + case PICA_REG_INDEX(lighting.abs_lut_input): + case PICA_REG_INDEX(lighting.lut_input): + case PICA_REG_INDEX(lighting.lut_scale): + case PICA_REG_INDEX(lighting.light_enable): + break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX(lighting.light[0].specular_0): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_0): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_0): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_0): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_0): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_0): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_0): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_0): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX(lighting.light[0].specular_1): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_1): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_1): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_1): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_1): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_1): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_1): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_1): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX(lighting.light[0].diffuse): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX(lighting.light[1].diffuse): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX(lighting.light[2].diffuse): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX(lighting.light[3].diffuse): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX(lighting.light[4].diffuse): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX(lighting.light[5].diffuse): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX(lighting.light[6].diffuse): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX(lighting.light[7].diffuse): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX(lighting.light[0].ambient): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX(lighting.light[1].ambient): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX(lighting.light[2].ambient): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX(lighting.light[3].ambient): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX(lighting.light[4].ambient): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX(lighting.light[5].ambient): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX(lighting.light[6].ambient): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX(lighting.light[7].ambient): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX(lighting.light[0].x): + case PICA_REG_INDEX(lighting.light[0].z): + SyncLightPosition(0); + break; + case PICA_REG_INDEX(lighting.light[1].x): + case PICA_REG_INDEX(lighting.light[1].z): + SyncLightPosition(1); + break; + case PICA_REG_INDEX(lighting.light[2].x): + case PICA_REG_INDEX(lighting.light[2].z): + SyncLightPosition(2); + break; + case PICA_REG_INDEX(lighting.light[3].x): + case PICA_REG_INDEX(lighting.light[3].z): + SyncLightPosition(3); + break; + case PICA_REG_INDEX(lighting.light[4].x): + case PICA_REG_INDEX(lighting.light[4].z): + SyncLightPosition(4); + break; + case PICA_REG_INDEX(lighting.light[5].x): + case PICA_REG_INDEX(lighting.light[5].z): + SyncLightPosition(5); + break; + case PICA_REG_INDEX(lighting.light[6].x): + case PICA_REG_INDEX(lighting.light[6].z): + SyncLightPosition(6); + break; + case PICA_REG_INDEX(lighting.light[7].x): + case PICA_REG_INDEX(lighting.light[7].z): + SyncLightPosition(7); + break; + + // Fragment spot lighting direction + case PICA_REG_INDEX(lighting.light[0].spot_x): + case PICA_REG_INDEX(lighting.light[0].spot_z): + SyncLightSpotDirection(0); + break; + case PICA_REG_INDEX(lighting.light[1].spot_x): + case PICA_REG_INDEX(lighting.light[1].spot_z): + SyncLightSpotDirection(1); + break; + case PICA_REG_INDEX(lighting.light[2].spot_x): + case PICA_REG_INDEX(lighting.light[2].spot_z): + SyncLightSpotDirection(2); + break; + case PICA_REG_INDEX(lighting.light[3].spot_x): + case PICA_REG_INDEX(lighting.light[3].spot_z): + SyncLightSpotDirection(3); + break; + case PICA_REG_INDEX(lighting.light[4].spot_x): + case PICA_REG_INDEX(lighting.light[4].spot_z): + SyncLightSpotDirection(4); + break; + case PICA_REG_INDEX(lighting.light[5].spot_x): + case PICA_REG_INDEX(lighting.light[5].spot_z): + SyncLightSpotDirection(5); + break; + case PICA_REG_INDEX(lighting.light[6].spot_x): + case PICA_REG_INDEX(lighting.light[6].spot_z): + SyncLightSpotDirection(6); + break; + case PICA_REG_INDEX(lighting.light[7].spot_x): + case PICA_REG_INDEX(lighting.light[7].spot_z): + SyncLightSpotDirection(7); + break; + + // Fragment lighting light source config + case PICA_REG_INDEX(lighting.light[0].config): + case PICA_REG_INDEX(lighting.light[1].config): + case PICA_REG_INDEX(lighting.light[2].config): + case PICA_REG_INDEX(lighting.light[3].config): + case PICA_REG_INDEX(lighting.light[4].config): + case PICA_REG_INDEX(lighting.light[5].config): + case PICA_REG_INDEX(lighting.light[6].config): + case PICA_REG_INDEX(lighting.light[7].config): + shader_dirty = true; + break; + + // Fragment lighting distance attenuation bias + case PICA_REG_INDEX(lighting.light[0].dist_atten_bias): + SyncLightDistanceAttenuationBias(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_bias): + SyncLightDistanceAttenuationBias(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_bias): + SyncLightDistanceAttenuationBias(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_bias): + SyncLightDistanceAttenuationBias(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_bias): + SyncLightDistanceAttenuationBias(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_bias): + SyncLightDistanceAttenuationBias(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_bias): + SyncLightDistanceAttenuationBias(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_bias): + SyncLightDistanceAttenuationBias(7); + break; + + // Fragment lighting distance attenuation scale + case PICA_REG_INDEX(lighting.light[0].dist_atten_scale): + SyncLightDistanceAttenuationScale(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_scale): + SyncLightDistanceAttenuationScale(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_scale): + SyncLightDistanceAttenuationScale(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_scale): + SyncLightDistanceAttenuationScale(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_scale): + SyncLightDistanceAttenuationScale(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_scale): + SyncLightDistanceAttenuationScale(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_scale): + SyncLightDistanceAttenuationScale(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_scale): + SyncLightDistanceAttenuationScale(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX(lighting.global_ambient): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX(lighting.lut_data[0]): + case PICA_REG_INDEX(lighting.lut_data[1]): + case PICA_REG_INDEX(lighting.lut_data[2]): + case PICA_REG_INDEX(lighting.lut_data[3]): + case PICA_REG_INDEX(lighting.lut_data[4]): + case PICA_REG_INDEX(lighting.lut_data[5]): + case PICA_REG_INDEX(lighting.lut_data[6]): + case PICA_REG_INDEX(lighting.lut_data[7]): { + const auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty_any = true; + break; + } + } +} + +void RasterizerVulkan::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushAll(); +} + +void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} + +void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + MICROPROFILE_SCOPE(OpenGL_Blits); + + VideoCore::SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.width = config.output_width; + src_params.stride = config.input_width; + src_params.height = config.output_height; + src_params.is_tiled = !config.input_linear; + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); + + VideoCore::SurfaceParams dst_params; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + dst_params.is_tiled = config.input_linear != config.dont_swizzle; + dst_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); + + auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); + if (src_surface == nullptr) + return false; + + dst_params.res_scale = src_surface->res_scale; + + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, false); + if (dst_surface == nullptr) { + return false; + } + + if (src_surface->is_tiled != dst_surface->is_tiled) + std::swap(src_rect.top, src_rect.bottom); + + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { + return false; + } + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; + } + + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0 && output_gap != 0) { + return false; + } + if (output_gap == 0 || output_width >= copy_size) { + output_width = copy_size; + output_gap = 0; + } + if (copy_size % output_width != 0) { + return false; + } + + VideoCore::SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = copy_size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + auto [src_surface, src_rect] = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) { + return false; + } + + if (output_gap != 0 && + (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || + output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { + return false; + } + + VideoCore::SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_surface->res_scale; + dst_params.stride = dst_params.width + src_surface->PixelsInBytes( + src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_surface->res_scale; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); + + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, load_gap); + if (dst_surface == nullptr) { + return false; + } + + if (dst_surface->type == VideoCore::SurfaceType::Texture) { + return false; + } + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { + return false; + } + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + auto dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) + return false; + + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); + return true; +} + +bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) { + return false; + } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + VideoCore::SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(config.width.Value(), pixel_stride); + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); + + const auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = Common::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = src_surface->alloc; + + return true; +} + +vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) { + auto properties = instance.GetPhysicalDevice().getProperties(); + const vk::SamplerCreateInfo sampler_info = { + .magFilter = PicaToVK::TextureFilterMode(info.mag_filter), + .minFilter = PicaToVK::TextureFilterMode(info.min_filter), + .mipmapMode = PicaToVK::TextureMipFilterMode(info.mip_filter), + .addressModeU = PicaToVK::WrapMode(info.wrap_s), + .addressModeV = PicaToVK::WrapMode(info.wrap_t), + .anisotropyEnable = true, + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false + }; + + vk::Device device = instance.GetDevice(); + return device.createSampler(sampler_info); +} + +vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info) { + u32 attachment_count = 0; + std::array attachments; + + if (info.color) { + attachments[attachment_count++] = info.color; + } + + if (info.depth) { + attachments[attachment_count++] = info.depth; + } + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = info.renderpass, + .attachmentCount = attachment_count, + .pAttachments = attachments.data(), + .width = info.width, + .height = info.height, + .layers = 1 + }; + + vk::Device device = instance.GetDevice(); + return device.createFramebuffer(framebuffer_info); +} + +void RasterizerVulkan::FlushBuffers() { + vertex_buffer.Flush(); + uniform_buffer.Flush(); + index_buffer.Flush(); + texture_buffer.Flush(); + texture_lf_buffer.Flush(); + pipeline_cache.MarkDescriptorSetsDirty(); +} + +void RasterizerVulkan::SetShader() { + pipeline_cache.UseFragmentShader(Pica::g_state.regs); +} + +void RasterizerVulkan::SyncClipEnabled() { + //state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerVulkan::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncCullMode() { + const auto& regs = Pica::g_state.regs; + pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); +} + +void RasterizerVulkan::SyncDepthScale() { + float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); + + if (depth_scale != uniform_block_data.data.depth_scale) { + uniform_block_data.data.depth_scale = depth_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncDepthOffset() { + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32(); + + if (depth_offset != uniform_block_data.data.depth_offset) { + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncBlendEnabled() { + pipeline_info.blending.blend_enable.Assign(Pica::g_state.regs.framebuffer.output_merger.alphablend_enable); +} + +void RasterizerVulkan::SyncBlendFuncs() { + const auto& regs = Pica::g_state.regs; + + pipeline_info.blending.color_blend_eq.Assign(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + pipeline_info.blending.alpha_blend_eq.Assign(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + pipeline_info.blending.src_color_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + pipeline_info.blending.dst_color_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + pipeline_info.blending.src_alpha_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_source_a); + pipeline_info.blending.dst_alpha_blend_factor.Assign(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerVulkan::SyncBlendColor() { + /*auto blend_color = + PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); + state.blend.color.red = blend_color[0]; + state.blend.color.green = blend_color[1]; + state.blend.color.blue = blend_color[2]; + state.blend.color.alpha = blend_color[3];*/ +} + +void RasterizerVulkan::SyncFogColor() { + const auto& regs = Pica::g_state.regs; + uniform_block_data.data.fog_color = { + regs.texturing.fog_color.r.Value() / 255.0f, + regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.b.Value() / 255.0f, + }; + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncProcTexNoise() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_noise_f = { + Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), + }; + uniform_block_data.data.proctex_noise_a = { + regs.proctex_noise_u.amplitude / 4095.0f, + regs.proctex_noise_v.amplitude / 4095.0f, + }; + uniform_block_data.data.proctex_noise_p = { + Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), + }; + + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncProcTexBias() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_bias = + Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8)) + .ToFloat32(); + + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncAlphaTest() { + const auto& regs = Pica::g_state.regs; + if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { + uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLogicOp() { + const auto& regs = Pica::g_state.regs; + pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op); +} + +void RasterizerVulkan::SyncColorWriteMask() { + const auto& regs = Pica::g_state.regs; + const u32 color_mask = (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF; + pipeline_info.blending.color_write_mask.Assign(color_mask); +} + +void RasterizerVulkan::SyncStencilWriteMask() { + const auto& regs = Pica::g_state.regs; + pipeline_info.depth_stencil.stencil_write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; +} + +void RasterizerVulkan::SyncDepthWriteMask() { + const auto& regs = Pica::g_state.regs; + pipeline_info.depth_stencil.depth_write_enable.Assign( + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable)); +} + +void RasterizerVulkan::SyncStencilTest() { + const auto& regs = Pica::g_state.regs; + + pipeline_info.depth_stencil.stencil_test_enable.Assign(regs.framebuffer.output_merger.stencil_test.enable && + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8); + pipeline_info.depth_stencil.stencil_fail_op.Assign(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); + pipeline_info.depth_stencil.stencil_pass_op.Assign(regs.framebuffer.output_merger.stencil_test.action_depth_pass); + pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(regs.framebuffer.output_merger.stencil_test.action_depth_fail); + pipeline_info.depth_stencil.stencil_compare_op.Assign(regs.framebuffer.output_merger.stencil_test.func); + pipeline_info.depth_stencil.stencil_reference = regs.framebuffer.output_merger.stencil_test.reference_value; + pipeline_info.depth_stencil.stencil_write_mask = regs.framebuffer.output_merger.stencil_test.input_mask; +} + +void RasterizerVulkan::SyncDepthTest() { + const auto& regs = Pica::g_state.regs; + pipeline_info.depth_stencil.depth_test_enable.Assign(regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1); + pipeline_info.depth_stencil.depth_compare_op.Assign( + regs.framebuffer.output_merger.depth_test_enable == 1 + ? regs.framebuffer.output_merger.depth_test_func.Value() + : Pica::FramebufferRegs::CompareFunc::Always); +} + +void RasterizerVulkan::SyncCombinerColor() { + auto combiner_color = PicaToVK::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); + if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { + uniform_block_data.data.tev_combiner_buffer_color = combiner_color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncTevConstColor(std::size_t stage_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage) { + const auto const_color = PicaToVK::ColorRGBA8(tev_stage.const_color); + + if (const_color == uniform_block_data.data.const_color[stage_index]) { + return; + } + + uniform_block_data.data.const_color[stage_index] = const_color; + uniform_block_data.dirty = true; +} + +void RasterizerVulkan::SyncGlobalAmbient() { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightSpecular0(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightSpecular1(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightDiffuse(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightAmbient(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightPosition(int light_index) { + const Common::Vec3f position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() + }; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightSpotDirection(int light_index) { + const auto& light = Pica::g_state.regs.lighting.light[light_index]; + const auto spot_direction = Common::Vec3f{light.spot_x, light.spot_y, light.spot_z} / 2047.0f; + + if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { + uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) { + float dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) + .ToFloat32(); + + if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { + uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) { + float dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) + .ToFloat32(); + + if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { + uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncShadowBias() { + const auto& shadow = Pica::g_state.regs.framebuffer.shadow; + float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); + float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); + + if (constant != uniform_block_data.data.shadow_bias_constant || + linear != uniform_block_data.data.shadow_bias_linear) { + uniform_block_data.data.shadow_bias_constant = constant; + uniform_block_data.data.shadow_bias_linear = linear; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncShadowTextureBias() { + int bias = Pica::g_state.regs.texturing.shadow.bias << 1; + if (bias != uniform_block_data.data.shadow_texture_bias) { + uniform_block_data.data.shadow_texture_bias = bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + + sizeof(Common::Vec2f) * 128; // fog + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + uniform_block_data.lighting_lut_dirty_any = false; + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.fog_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.fog_lut_dirty = false; + } + + texture_lf_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::SyncAndUploadLUTs() { + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha + sizeof(Common::Vec4f) * 256 + // proctex + sizeof(Common::Vec4f) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto SyncProcTexValueLUT = [this, &buffer = buffer, &offset = offset, &invalidate = invalidate, &bytes_used]( + const std::array& lut, + std::array& lut_data, int& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { + const bool sync_vs = accelerate_draw; + const bool sync_fs = uniform_block_data.dirty; + + if (!sync_vs && !sync_fs) { + return; + } + + u32 used_bytes = 0; + const u32 uniform_size = static_cast(uniform_size_aligned_vs + uniform_size_aligned_fs); + auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, + static_cast(uniform_buffer_alignment)); + + if (sync_vs) { + Pica::Shader::VSUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); + std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); + + pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms)); + used_bytes += static_cast(uniform_size_aligned_vs); + } + + if (sync_fs || invalidate) { + std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(Pica::Shader::UniformData)); + + pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), offset + used_bytes, + sizeof(uniform_block_data.data)); + uniform_block_data.dirty = false; + used_bytes += static_cast(uniform_size_aligned_fs); + } + + uniform_buffer.Commit(used_bytes); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 000000000..d8b4a4be9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,314 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/regs_lighting.h" +#include "video_core/regs_texturing.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_uniforms.h" + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { + +struct ScreenInfo; + +class Instance; +class TaskScheduler; +class RenderpassCache; + +struct SamplerInfo { + using TextureConfig = Pica::TexturingRegs::TextureConfig; + TextureConfig::TextureFilter mag_filter; + TextureConfig::TextureFilter min_filter; + TextureConfig::TextureFilter mip_filter; + TextureConfig::WrapMode wrap_s; + TextureConfig::WrapMode wrap_t; + u32 border_color = 0; + u32 lod_min = 0; + u32 lod_max = 0; + s32 lod_bias = 0; + + // TODO(wwylele): remove this once mipmap for cube is implemented + bool supress_mipmap_for_cube = false; + + auto operator<=>(const SamplerInfo&) const noexcept = default; +}; + +struct FramebufferInfo { + vk::ImageView color; + vk::ImageView depth; + vk::RenderPass renderpass; + u32 width = 1; + u32 height = 1; + + auto operator<=>(const FramebufferInfo&) const noexcept = default; +}; + +} + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::SamplerInfo& info) const noexcept { + return Common::ComputeHash64(&info, sizeof(Vulkan::SamplerInfo)); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::FramebufferInfo& info) const noexcept { + return Common::ComputeHash64(&info, sizeof(Vulkan::FramebufferInfo)); + } +}; +} // namespace std + +namespace Vulkan { + +class RasterizerVulkan : public VideoCore::RasterizerAccelerated { + friend class RendererVulkan; +public: + explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, TaskScheduler& scheduler, + TextureRuntime& runtime, RenderpassCache& renderpass_cache); + ~RasterizerVulkan() override; + + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; + void DrawTriangles() override; + void NotifyPicaRegisterChanged(u32 id) override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info); + bool AccelerateDrawBatch(bool is_indexed) override; + + /// Syncs entire status to match PICA registers + void SyncEntireState() override; + + /// Flushes all rasterizer owned buffers + void FlushBuffers(); + +private: + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + + /// Sets the OpenGL shader in accordance with the current PICA register state + void SetShader(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the depth scale to match the PICA register + void SyncDepthScale(); + + /// Syncs the depth offset to match the PICA register + void SyncDepthOffset(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the fog states to match the PICA register + void SyncFogColor(); + + /// Sync the procedural texture noise configuration to match the PICA register + void SyncProcTexNoise(); + + /// Sync the procedural texture bias configuration to match the PICA register + void SyncProcTexBias(); + + /// Syncs the alpha test states to match the PICA register + void SyncAlphaTest(); + + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs the TEV combiner color buffer to match the PICA register + void SyncCombinerColor(); + + /// Syncs the TEV constant color to match the PICA register + void SyncTevConstColor(std::size_t tev_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage); + + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + + /// Syncs the specified spot light direcition to match the PICA register + void SyncLightSpotDirection(int light_index); + + /// Syncs the specified light's distance attenuation bias to match the PICA register + void SyncLightDistanceAttenuationBias(int light_index); + + /// Syncs the specified light's distance attenuation scale to match the PICA register + void SyncLightDistanceAttenuationScale(int light_index); + + /// Syncs the shadow rendering bias to match the PICA register + void SyncShadowBias(); + + /// Syncs the shadow texture bias to match the PICA register + void SyncShadowTextureBias(); + + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); + + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(bool accelerate_draw); + + /// Generic draw function for DrawTriangles and AccelerateDrawBatch + bool Draw(bool accelerate, bool is_indexed); + + /// Internal implementation for AccelerateDrawBatch + bool AccelerateDrawBatchInternal(bool is_indexed); + + struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; + }; + + /// Retrieve the range and the size of the input vertex + VertexArrayInfo AnalyzeVertexArray(bool is_indexed); + + /// Setup vertex array for AccelerateDrawBatch + void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max); + + /// Setup vertex shader for AccelerateDrawBatch + bool SetupVertexShader(); + + /// Setup geometry shader for AccelerateDrawBatch + bool SetupGeometryShader(); + + /// Creates a new sampler object + vk::Sampler CreateSampler(const SamplerInfo& info); + + /// Creates a new Vulkan framebuffer object + vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info); + +private: + const Instance& instance; + TaskScheduler& scheduler; + TextureRuntime& runtime; + RenderpassCache& renderpass_cache; + RasterizerCache res_cache; + PipelineCache pipeline_cache; + bool shader_dirty = true; + + /// Structure that the hardware rendered vertices are composed of + struct HardwareVertex { + HardwareVertex() = default; + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion); + + constexpr static VertexLayout GetVertexLayout(); + + Common::Vec4f position; + Common::Vec4f color; + Common::Vec2f tex_coord0; + Common::Vec2f tex_coord1; + Common::Vec2f tex_coord2; + float tex_coord0_w; + Common::Vec4f normquat; + Common::Vec3f view; + }; + + std::vector vertex_batch; + ImageAlloc default_texture; + vk::Sampler default_sampler; + + struct { + Pica::Shader::UniformData data{}; + std::array lighting_lut_dirty{}; + bool lighting_lut_dirty_any = true; + bool fog_lut_dirty = true; + bool proctex_noise_lut_dirty = true; + bool proctex_color_map_dirty = true; + bool proctex_alpha_map_dirty = true; + bool proctex_lut_dirty = true; + bool proctex_diff_lut_dirty = true; + bool dirty = true; + } uniform_block_data = {}; + + std::array hw_enabled_attributes{}; + + std::array texture_samplers; + SamplerInfo texture_cube_sampler; + std::unordered_map samplers; + std::unordered_map framebuffers; + + StreamBuffer vertex_buffer; + StreamBuffer uniform_buffer; + StreamBuffer index_buffer; + StreamBuffer texture_buffer; + StreamBuffer texture_lf_buffer; + PipelineInfo pipeline_info; + std::size_t uniform_buffer_alignment; + std::size_t uniform_size_aligned_vs; + std::size_t uniform_size_aligned_fs; + + std::array, Pica::LightingRegs::NumLightingSampler> + lighting_lut_data{}; + std::array fog_lut_data{}; + std::array proctex_noise_lut_data{}; + std::array proctex_color_map_data{}; + std::array proctex_alpha_map_data{}; + std::array proctex_lut_data{}; + std::array proctex_diff_lut_data{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index caf9ac687..af82c3a30 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -6,6 +6,7 @@ #include "common/assert.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" namespace Vulkan { @@ -30,7 +31,8 @@ vk::Format ToVkFormatDepth(u32 index) { } } -RenderpassCache::RenderpassCache(const Instance& instance) : instance{instance} { +RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler) + : instance{instance}, scheduler{scheduler} { // Pre-create all needed renderpasses by the renderer for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { @@ -75,6 +77,26 @@ RenderpassCache::~RenderpassCache() { device.destroyRenderPass(present_renderpass); } +void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) { + if (renderpass_active) { + return; + } + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + renderpass_active = true; +} + +void RenderpassCache::ExitRenderpass() { + if (!renderpass_active) { + return; + } + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.endRenderPass(); + renderpass_active = false; +} + void RenderpassCache::CreatePresentRenderpass(vk::Format format) { if (!present_renderpass) { present_renderpass = CreateRenderPass(format, vk::Format::eUndefined, diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index 5a2cd26f3..47187aa62 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -10,18 +10,21 @@ namespace Vulkan { class Instance; -class Swapchain; +class TaskScheduler; constexpr u32 MAX_COLOR_FORMATS = 5; constexpr u32 MAX_DEPTH_FORMATS = 3; class RenderpassCache { public: - RenderpassCache(const Instance& instance); + RenderpassCache(const Instance& instance, TaskScheduler& scheduler); ~RenderpassCache(); - /// Creates the renderpass used when rendering to the swapchain - void CreatePresentRenderpass(vk::Format format); + /// Begins a new renderpass only when no other renderpass is currently active + void EnterRenderpass(const vk::RenderPassBeginInfo begin_info); + + /// Exits from any currently active renderpass instance + void ExitRenderpass(); /// Returns the renderpass associated with the color-depth format pair vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, @@ -32,6 +35,9 @@ public: return present_renderpass; } + /// Creates the renderpass used when rendering to the swapchain + void CreatePresentRenderpass(vk::Format format); + private: /// Creates a renderpass configured appropriately and stores it in cached_renderpasses vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, vk::AttachmentLoadOp load_op, @@ -39,8 +45,11 @@ private: private: const Instance& instance; + TaskScheduler& scheduler; + + bool renderpass_active = false; vk::RenderPass present_renderpass{}; vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS+1][MAX_DEPTH_FORMATS+1][2]; }; -} // namespace VideoCore::Vulkan +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 3c96ee08f..6940f3017 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1209,7 +1209,8 @@ float ProcTexNoiseCoef(vec2 x) { std::string GenerateFragmentShader(const PicaFSConfig& config) { const auto& state = config.state; - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; + std::string out = "#version 450 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += GetVertexInterfaceDeclaration(false); out += R"( @@ -1538,7 +1539,8 @@ do { } std::string GenerateTrivialVertexShader() { - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; + std::string out = "#version 450 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += fmt::format("layout(location = {}) in vec4 vert_position;\n" "layout(location = {}) in vec4 vert_color;\n" @@ -1723,7 +1725,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { }; std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) { - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; + std::string out = "#version 450 core\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += R"( layout(triangles) in; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 60629ae33..8b610643a 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -68,13 +68,14 @@ StagingBuffer::~StagingBuffer() { vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); } -StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info) - : instance{instance}, scheduler{scheduler}, info{info}, - staging{instance, info.size, vk::BufferUsageFlagBits::eTransferSrc} { +StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, + u32 size, vk::BufferUsageFlagBits usage, std::span view_formats) + : instance{instance}, scheduler{scheduler}, staging{instance, size, vk::BufferUsageFlagBits::eTransferSrc}, + usage{usage}, total_size{size} { const vk::BufferCreateInfo buffer_info = { - .size = info.size, - .usage = info.usage | vk::BufferUsageFlagBits::eTransferDst + .size = total_size, + .usage = usage | vk::BufferUsageFlagBits::eTransferDst }; const VmaAllocationCreateInfo alloc_create_info = { @@ -91,37 +92,36 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, c buffer = vk::Buffer{unsafe_buffer}; - vk::Device device = instance.GetDevice(); - for (u32 i = 0; i < info.views.size(); i++) { - if (info.views[i] == vk::Format::eUndefined) { - view_count = i; - break; - } + ASSERT(view_formats.size() < MAX_BUFFER_VIEWS); + vk::Device device = instance.GetDevice(); + for (std::size_t i = 0; i < view_formats.size(); i++) { const vk::BufferViewCreateInfo view_info = { .buffer = buffer, - .format = info.views[i], - .range = info.size + .format = view_formats[i], + .offset = 0, + .range = total_size }; views[i] = device.createBufferView(view_info); } - available_size = info.size; + available_size = total_size; + view_count = view_formats.size(); } StreamBuffer::~StreamBuffer() { if (buffer) { vk::Device device = instance.GetDevice(); vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); - for (u32 i = 0; i < view_count; i++) { + for (std::size_t i = 0; i < view_count; i++) { device.destroyBufferView(views[i]); } } } std::tuple StreamBuffer::Map(u32 size, u32 alignment) { - ASSERT(size <= info.size && alignment <= info.size); + ASSERT(size <= total_size && alignment <= total_size); if (alignment > 0) { buffer_offset = Common::AlignUp(buffer_offset, alignment); @@ -134,7 +134,7 @@ std::tuple StreamBuffer::Map(u32 size, u32 alignment) { Flush(); // If we are at the end of the buffer, start over - if (buffer_offset + size > info.size) { + if (buffer_offset + size > total_size) { Invalidate(); invalidate = true; } @@ -145,7 +145,7 @@ std::tuple StreamBuffer::Map(u32 size, u32 alignment) { LOG_WARNING(Render_Vulkan, "Buffer GPU stall"); Invalidate(); regions.clear(); - available_size = info.size; + available_size = total_size; } } @@ -156,7 +156,7 @@ std::tuple StreamBuffer::Map(u32 size, u32 alignment) { void StreamBuffer::Commit(u32 size) { vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - auto [access_mask, stage_mask] = ToVkAccessStageFlags(info.usage); + auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage); const vk::BufferMemoryBarrier buffer_barrier = { .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = access_mask, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index bfc4d5556..06f089186 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -15,12 +15,6 @@ class TaskScheduler; constexpr u32 MAX_BUFFER_VIEWS = 3; -struct BufferInfo { - u32 size = 0; - vk::BufferUsageFlagBits usage{}; - std::array views{}; -}; - struct LockedRegion { u32 size = 0; u64 fence_counter = 0; @@ -38,7 +32,8 @@ struct StagingBuffer { class StreamBuffer { public: - StreamBuffer(const Instance& instance, TaskScheduler& scheduler, const BufferInfo& info); + StreamBuffer(const Instance& instance, TaskScheduler& scheduler, + u32 size, vk::BufferUsageFlagBits usage, std::span views); ~StreamBuffer(); std::tuple Map(u32 size, u32 alignment = 0); @@ -54,6 +49,10 @@ public: return buffer; } + u32 GetBufferOffset() const { + return buffer_offset; + } + /// Returns an immutable reference to the requested buffer view const vk::BufferView& GetView(u32 index = 0) const { ASSERT(index < view_count); @@ -70,13 +69,14 @@ private: private: const Instance& instance; TaskScheduler& scheduler; - BufferInfo info{}; StagingBuffer staging; vk::Buffer buffer{}; VmaAllocation allocation{}; + vk::BufferUsageFlagBits usage; + u32 total_size = 0; std::array views{}; - u32 view_count = 0; + std::size_t view_count = 0; u32 buffer_offset = 0; u32 flush_start = 0; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index b872293ef..80c88609c 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -11,9 +11,8 @@ namespace Vulkan { -Swapchain::Swapchain(const Instance& instance, CommandScheduler& scheduler, - RenderpassCache& renderpass_cache, vk::SurfaceKHR surface) - : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, surface{surface} { +Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache) + : instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} { // Set the surface format early for RenderpassCache to create the present renderpass Configure(0, 0); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 7cd331639..e502ffc85 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -11,13 +11,12 @@ namespace Vulkan { class Instance; -class CommandScheduler; +class TaskScheduler; class RenderpassCache; class Swapchain { public: - Swapchain(const Instance& instance, CommandScheduler& scheduler, - RenderpassCache& renderpass_cache,vk::SurfaceKHR surface); + Swapchain(const Instance& instance, RenderpassCache& renderpass_cache); ~Swapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -39,6 +38,11 @@ public: return surface; } + /// Returns the current framebuffe + vk::Framebuffer GetFramebuffer() const { + return swapchain_images[current_image].framebuffer; + } + /// Returns the swapchain format vk::SurfaceFormatKHR GetSurfaceFormat() const { return surface_format; @@ -69,7 +73,6 @@ private: private: const Instance& instance; - CommandScheduler& scheduler; RenderpassCache& renderpass_cache; vk::SwapchainKHR swapchain{}; vk::SurfaceKHR surface{}; diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index d7c039189..066ba2f5b 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -24,6 +24,7 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} { vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024}, vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024}, vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, 512}, vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048}, vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024} }; diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index d2204233a..d3941a7a4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -5,6 +5,7 @@ #define VULKAN_HPP_NO_CONSTRUCTORS #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" @@ -54,10 +55,27 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) { return vk::ImageAspectFlagBits::eColor; } +vk::FormatFeatureFlagBits ToVkFormatFeatures(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return vk::FormatFeatureFlagBits::eColorAttachment; + case VideoCore::SurfaceType::Depth: + case VideoCore::SurfaceType::DepthStencil: + return vk::FormatFeatureFlagBits::eDepthStencilAttachment; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return vk::FormatFeatureFlagBits::eColorAttachment; +} + constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024; -TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler) - : instance{instance}, scheduler{scheduler} { +TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler, + RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache} { for (auto& buffer : staging_buffers) { buffer = std::make_unique(instance, STAGING_BUFFER_SIZE, @@ -66,6 +84,17 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule } } +TextureRuntime::~TextureRuntime() { + VmaAllocator allocator = instance.GetAllocator(); + vk::Device device = instance.GetDevice(); + for (auto& [key, alloc] : texture_recycler) { + vmaDestroyImage(allocator, alloc.image, alloc.allocation); + device.destroyImageView(alloc.image_view); + } + + texture_recycler.clear(); +} + StagingData TextureRuntime::FindStaging(u32 size, bool upload) { const u32 current_slot = scheduler.GetCurrentSlotIndex(); const u32 offset = staging_offsets[current_slot]; @@ -77,6 +106,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) { const auto& buffer = staging_buffers[current_slot]; return StagingData{ .buffer = buffer->buffer, + .size = size, .mapped = buffer->mapped.subspan(offset, size), .buffer_offset = offset }; @@ -108,6 +138,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma vk::Format vk_format = instance.GetFormatAlternative(ToVkFormat(format)); vk::ImageAspectFlags aspect = GetImageAspect(vk_format); + const u32 levels = std::bit_width(std::max(width, height)); const vk::ImageCreateInfo image_info = { .flags = type == VideoCore::TextureType::CubeMap ? vk::ImageCreateFlagBits::eCubeCompatible : @@ -115,7 +146,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma .imageType = vk::ImageType::e2D, .format = vk_format, .extent = {width, height, 1}, - .mipLevels = std::bit_width(std::max(width, height)), + .mipLevels = levels, .arrayLayers = layers, .samples = vk::SampleCountFlagBits::e1, .usage = GetImageUsage(aspect), @@ -160,13 +191,26 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma .image = image, .image_view = image_view, .allocation = allocation, + .levels = levels }; } +void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest) { + const VideoCore::SurfaceType type = VideoCore::GetFormatType(format); + const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type); + if (!instance.IsFormatSupported(ToVkFormat(format), feature)) { + LOG_CRITICAL(Render_Vulkan, "Unimplemented format converion!"); + UNREACHABLE(); + } +} + bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, VideoCore::ClearValue value) { + renderpass_cache.ExitRenderpass(); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1); + Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, clear.texture_level, 1); // For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage if (clear.texture_rect == surface.GetScaledRect()) { @@ -184,7 +228,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea .layerCount = 1 }; - command_buffer.clearColorImage(surface.image, vk::ImageLayout::eTransferDstOptimal, + command_buffer.clearColorImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, clear_color, range); } else if (aspect & vk::ImageAspectFlagBits::eDepth || aspect & vk::ImageAspectFlagBits::eStencil) { const vk::ClearDepthStencilValue clear_depth = { @@ -200,7 +244,7 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea .layerCount = 1 }; - command_buffer.clearDepthStencilImage(surface.image, vk::ImageLayout::eTransferDstOptimal, + command_buffer.clearDepthStencilImage(surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, clear_depth, range); } } else { @@ -211,6 +255,8 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea } bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) { + renderpass_cache.ExitRenderpass(); + const vk::ImageCopy image_copy = { .srcSubresource = { .aspectMask = ToVkAspect(source.type), @@ -230,19 +276,21 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, const VideoCor }; vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); - dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); + Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); + Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); - command_buffer.copyImage(source.image, vk::ImageLayout::eTransferSrcOptimal, - dest.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, + dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy); return true; } bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { + renderpass_cache.ExitRenderpass(); + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - source.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); - dest.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); + Transition(command_buffer, source.alloc, vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); + Transition(command_buffer, dest.alloc, vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); const std::array source_offsets = { vk::Offset3D{static_cast(blit.src_rect.left), static_cast(blit.src_rect.bottom), 0}, @@ -271,14 +319,16 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCor .dstOffsets = dest_offsets }; - command_buffer.blitImage(source.image, vk::ImageLayout::eTransferSrcOptimal, - dest.image, vk::ImageLayout::eTransferDstOptimal, + command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, + dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eLinear); return true; } void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { + renderpass_cache.ExitRenderpass(); + // TODO: Investigate AMD single pass downsampler s32 current_width = surface.GetScaledWidth(); s32 current_height = surface.GetScaledHeight(); @@ -287,8 +337,8 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { vk::ImageAspectFlags aspect = ToVkAspect(surface.type); vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); for (u32 i = 1; i < levels; i++) { - surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); - surface.TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, i, 1); + Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); + Transition(command_buffer, surface.alloc, vk::ImageLayout::eTransferDstOptimal, i, 1); const std::array source_offsets = { vk::Offset3D{0, 0, 0}, @@ -318,166 +368,15 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { .dstOffsets = dest_offsets }; - command_buffer.blitImage(surface.image, vk::ImageLayout::eTransferSrcOptimal, - surface.image, vk::ImageLayout::eTransferDstOptimal, + command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal, + surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eLinear); } } -Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) - : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, - scheduler{runtime.GetScheduler()} { - const ImageAlloc alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), - params.pixel_format, texture_type); - - allocation = alloc.allocation; - image_view = alloc.image_view; - image = alloc.image; -} - -MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); -void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { - MICROPROFILE_SCOPE(Vulkan_Upload); - - const bool is_scaled = res_scale != 1; - if (is_scaled) { - ScaledUpload(upload); - } else { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - const VideoCore::Rect2D rect = upload.texture_rect; - const vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource = { - .aspectMask = aspect, - .mipLevel = upload.texture_level, - .baseArrayLayer = 0, - .layerCount = 1 - }, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} - }; - - TransitionLevels(command_buffer, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); - command_buffer.copyBufferToImage(staging.buffer, image, - vk::ImageLayout::eTransferDstOptimal, - copy_region); - } - - InvalidateAllWatcher(); -} - -MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64)); -void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) { - MICROPROFILE_SCOPE(Vulkan_Download); - - const bool is_scaled = res_scale != 1; - if (is_scaled) { - ScaledDownload(download); - } else { - u32 region_count = 0; - std::array copy_regions; - - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - const VideoCore::Rect2D rect = download.texture_rect; - vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource = { - .aspectMask = aspect, - .mipLevel = download.texture_level, - .baseArrayLayer = 0, - .layerCount = 1 - }, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} - }; - - if (aspect & vk::ImageAspectFlagBits::eColor) { - copy_regions[region_count++] = copy_region; - } else if (aspect & vk::ImageAspectFlagBits::eDepth) { - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - copy_regions[region_count++] = copy_region; - - if (aspect & vk::ImageAspectFlagBits::eStencil) { - copy_region.bufferOffset += staging.mapped.size(); - copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil; - copy_regions[region_count++] = copy_region; - } - } - - TransitionLevels(command_buffer, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); - - // Copy pixel data to the staging buffer - command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, - staging.buffer, region_count, copy_regions.data()); - - scheduler.Submit(true); - } -} - -void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { - /*const u32 rect_width = download.texture_rect.GetWidth(); - const u32 rect_height = download.texture_rect.GetHeight(); - - // Allocate an unscaled texture that fits the download rectangle to use as a blit destination - const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, - VideoCore::TextureType::Texture2D); - runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); - runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture); - - // Blit the scaled rectangle to the unscaled texture - const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; - glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top, - 0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR); - - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); - - const auto& tuple = runtime.GetFormatTuple(pixel_format); - if (driver.IsOpenGLES()) { - const auto& downloader_es = runtime.GetDownloaderES(); - downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - rect_height, rect_width, - reinterpret_cast(download.buffer_offset)); - } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - reinterpret_cast(download.buffer_offset)); - }*/ -} - -void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { - /*const u32 rect_width = upload.texture_rect.GetWidth(); - const u32 rect_height = upload.texture_rect.GetHeight(); - - OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, - VideoCore::TextureType::Texture2D); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); - - glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, - tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); - - const auto scaled_rect = upload.texture_rect * res_scale; - const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; - const auto& filterer = runtime.GetFilterer(); - if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) { - runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); - runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture); - - // If filtering fails, resort to normal blitting - glBlitFramebuffer(0, 0, rect_width, rect_height, - upload.texture_rect.left, upload.texture_rect.bottom, - upload.texture_rect.right, upload.texture_rect.top, - MakeBufferMask(type), GL_LINEAR); - }*/ -} - -void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, - u32 level, u32 level_count) { - if (new_layout == layout) { +void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, + vk::ImageLayout new_layout, u32 level, u32 level_count) { + if (new_layout == alloc.layout || !alloc.image) { return; } @@ -540,23 +439,194 @@ void Surface::TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout return info; }; - LayoutInfo source = GetLayoutInfo(layout); + LayoutInfo source = GetLayoutInfo(alloc.layout); LayoutInfo dest = GetLayoutInfo(new_layout); const vk::ImageMemoryBarrier barrier = { .srcAccessMask = source.access, .dstAccessMask = dest.access, - .oldLayout = layout, + .oldLayout = alloc.layout, .newLayout = new_layout, - .image = image, - .subresourceRange = {aspect, level, level_count, 0, 1} + .image = alloc.image, + .subresourceRange = { + .aspectMask = alloc.aspect, + .baseMipLevel = level, + .levelCount = level_count, + .baseArrayLayer = 0, + .layerCount = 1 + } }; command_buffer.pipelineBarrier(source.stage, dest.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier); - layout = new_layout; + alloc.layout = new_layout; +} + +Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) + : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, + scheduler{runtime.GetScheduler()} { + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); +} + +Surface::~Surface() { + const VideoCore::HostTextureTag tag = { + .format = pixel_format, + .width = GetScaledWidth(), + .height = GetScaledHeight(), + .layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u + }; + + runtime.texture_recycler.emplace(tag, std::move(alloc)); +} + +MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); +void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { + MICROPROFILE_SCOPE(Vulkan_Upload); + + const bool is_scaled = res_scale != 1; + if (is_scaled) { + ScaledUpload(upload); + } else { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const VideoCore::Rect2D rect = upload.texture_rect; + const vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = { + .aspectMask = alloc.aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} + }; + + runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); + command_buffer.copyBufferToImage(staging.buffer, alloc.image, + vk::ImageLayout::eTransferDstOptimal, + copy_region); + } + + InvalidateAllWatcher(); + + // Lock this data until the next scheduler switch + const u32 current_slot = scheduler.GetCurrentSlotIndex(); + runtime.staging_offsets[current_slot] += staging.size; +} + +MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64)); +void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) { + MICROPROFILE_SCOPE(Vulkan_Download); + + const bool is_scaled = res_scale != 1; + if (is_scaled) { + ScaledDownload(download); + } else { + u32 region_count = 0; + std::array copy_regions; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const VideoCore::Rect2D rect = download.texture_rect; + vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = { + .aspectMask = alloc.aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1} + }; + + if (alloc.aspect & vk::ImageAspectFlagBits::eColor) { + copy_regions[region_count++] = copy_region; + } else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) { + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + copy_regions[region_count++] = copy_region; + + if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { + copy_region.bufferOffset += staging.mapped.size(); + copy_region.imageSubresource.aspectMask |= vk::ImageAspectFlagBits::eStencil; + copy_regions[region_count++] = copy_region; + } + } + + runtime.Transition(command_buffer, alloc, vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); + + // Copy pixel data to the staging buffer + command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal, + staging.buffer, region_count, copy_regions.data()); + + scheduler.Submit(true); + } + + // Lock this data until the next scheduler switch + const u32 current_slot = scheduler.GetCurrentSlotIndex(); + runtime.staging_offsets[current_slot] += staging.size; +} + +void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { + /*const u32 rect_width = download.texture_rect.GetWidth(); + const u32 rect_height = download.texture_rect.GetHeight(); + + // Allocate an unscaled texture that fits the download rectangle to use as a blit destination + const ImageAlloc unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, + VideoCore::TextureType::Texture2D); + runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); + runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture); + + // Blit the scaled rectangle to the unscaled texture + const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; + glBlitFramebuffer(scaled_rect.left, scaled_rect.bottom, scaled_rect.right, scaled_rect.top, + 0, 0, rect_width, rect_height, MakeBufferMask(type), GL_LINEAR); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); + + const auto& tuple = runtime.GetFormatTuple(pixel_format); + if (driver.IsOpenGLES()) { + const auto& downloader_es = runtime.GetDownloaderES(); + downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect_height, rect_width, + reinterpret_cast(download.buffer_offset)); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + reinterpret_cast(download.buffer_offset)); + }*/ +} + +void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload) { + /*const u32 rect_width = upload.texture_rect.GetWidth(); + const u32 rect_height = upload.texture_rect.GetHeight(); + + OGLTexture unscaled_tex = runtime.Allocate(rect_width, rect_height, pixel_format, + VideoCore::TextureType::Texture2D); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, unscaled_tex.handle); + + glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, 0, 0, rect_width, rect_height, + tuple.format, tuple.type, reinterpret_cast(upload.buffer_offset)); + + const auto scaled_rect = upload.texture_rect * res_scale; + const auto unscaled_rect = VideoCore::Rect2D{0, rect_height, rect_width, 0}; + const auto& filterer = runtime.GetFilterer(); + if (!filterer.Filter(unscaled_tex, unscaled_rect, texture, scaled_rect, type)) { + runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_tex); + runtime.BindFramebuffer(GL_DRAW_FRAMEBUFFER, upload.texture_level, GL_TEXTURE_2D, type, texture); + + // If filtering fails, resort to normal blitting + glBlitFramebuffer(0, 0, rect_width, rect_height, + upload.texture_rect.left, upload.texture_rect.bottom, + upload.texture_rect.right, upload.texture_rect.top, + MakeBufferMask(type), GL_LINEAR); + }*/ } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 79b62949b..f308a7631 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -15,6 +15,7 @@ namespace Vulkan { struct StagingData { vk::Buffer buffer; + u32 size = 0; std::span mapped{}; u32 buffer_offset = 0; }; @@ -23,9 +24,13 @@ struct ImageAlloc { vk::Image image; vk::ImageView image_view; VmaAllocation allocation; + vk::ImageLayout layout = vk::ImageLayout::eUndefined; + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone; + u32 levels = 1; }; class Instance; +class RenderpassCache; class Surface; /** @@ -35,12 +40,25 @@ class Surface; class TextureRuntime { friend class Surface; public: - TextureRuntime(const Instance& instance, TaskScheduler& scheduler); - ~TextureRuntime() = default; + TextureRuntime(const Instance& instance, TaskScheduler& scheduler, + RenderpassCache& renderpass_cache); + ~TextureRuntime(); /// Maps an internal staging buffer of the provided size of pixel uploads/downloads StagingData FindStaging(u32 size, bool upload); + /// Allocates a vulkan image possibly resusing an existing one + ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format, + VideoCore::TextureType type); + + /// Performs required format convertions on the staging data + void FormatConvert(VideoCore::PixelFormat format, bool upload, + std::span source, std::span dest); + + /// Transitions the mip level range of the surface to new_layout + void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, + vk::ImageLayout new_layout, u32 level, u32 level_count); + /// Performs operations that need to be done on every scheduler slot switch void OnSlotSwitch(u32 new_slot); @@ -58,10 +76,6 @@ public: void GenerateMipmaps(Surface& surface, u32 max_level); private: - /// Allocates a vulkan image possibly resusing an existing one - ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format, - VideoCore::TextureType type); - /// Returns the current Vulkan instance const Instance& GetInstance() const { return instance; @@ -75,6 +89,7 @@ private: private: const Instance& instance; TaskScheduler& scheduler; + RenderpassCache& renderpass_cache; std::array, SCHEDULER_COMMAND_COUNT> staging_buffers; std::array staging_offsets{}; std::unordered_map texture_recycler; @@ -82,9 +97,10 @@ private: class Surface : public VideoCore::SurfaceBase { friend class TextureRuntime; + friend class RasterizerVulkan; public: Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); - ~Surface() override = default; + ~Surface() override; /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); @@ -102,21 +118,13 @@ private: /// Overrides the image layout of the mip level range void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); - /// Transitions the mip level range of the surface to new_layout - void TransitionLevels(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, - u32 level, u32 level_count); - private: TextureRuntime& runtime; const Instance& instance; TaskScheduler& scheduler; - vk::Image image{}; - vk::ImageView image_view{}; - VmaAllocation allocation = nullptr; + ImageAlloc alloc{}; vk::Format internal_format = vk::Format::eUndefined; - vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone; - vk::ImageLayout layout = vk::ImageLayout::eUndefined; }; struct Traits { diff --git a/src/video_core/shader/shader_cache.h b/src/video_core/shader/shader_cache.h index ba3f703f8..4cf03da3c 100644 --- a/src/video_core/shader/shader_cache.h +++ b/src/video_core/shader/shader_cache.h @@ -40,7 +40,7 @@ public: shaders.emplace(key, std::move(shader)); } -private: +public: std::unordered_map shaders; }; @@ -89,7 +89,7 @@ public: shader_map.insert_or_assign(key, &cached_shader); } -private: +public: std::unordered_map shader_map; std::unordered_map shader_cache; }; diff --git a/src/video_core/shader/shader_uniforms.cpp b/src/video_core/shader/shader_uniforms.cpp new file mode 100644 index 000000000..ec8336ca4 --- /dev/null +++ b/src/video_core/shader/shader_uniforms.cpp @@ -0,0 +1,25 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_uniforms.h" + +namespace Pica::Shader { + +void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup) { + std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), + [](bool value) -> BoolAligned { return {value ? 1 : 0}; }); + std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), + [](const auto& value) -> Common::Vec4u { + return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; + }); + std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), + [](const auto& value) -> Common::Vec4f { + return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), + value.w.ToFloat32()}; + }); +} + +} // namespace Pica::Shader diff --git a/src/video_core/shader/shader_uniforms.h b/src/video_core/shader/shader_uniforms.h new file mode 100644 index 000000000..db8179985 --- /dev/null +++ b/src/video_core/shader/shader_uniforms.h @@ -0,0 +1,98 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/vector_math.h" +#include "video_core/regs_lighting.h" + +namespace Pica { +struct ShaderRegs; +} + +namespace Pica::Shader { + +class ShaderSetup; + +enum class UniformBindings : u32 { Common, VS, GS }; + +struct LightSrc { + alignas(16) Common::Vec3f specular_0; + alignas(16) Common::Vec3f specular_1; + alignas(16) Common::Vec3f diffuse; + alignas(16) Common::Vec3f ambient; + alignas(16) Common::Vec3f position; + alignas(16) Common::Vec3f spot_direction; // negated + float dist_atten_bias; + float dist_atten_scale; +}; + +/** + * Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned + * NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at + * the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. + * Not following that rule will cause problems on some AMD drivers. + */ +struct UniformData { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4]; + alignas(16) Common::Vec3f fog_color; + alignas(8) Common::Vec2f proctex_noise_f; + alignas(8) Common::Vec2f proctex_noise_a; + alignas(8) Common::Vec2f proctex_noise_p; + alignas(16) Common::Vec3f lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) Common::Vec4f tev_combiner_buffer_color; + alignas(16) Common::Vec4f clip_coef; +}; + +static_assert(sizeof(UniformData) == 0x4F0, + "The size of the UniformData does not match the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/** + * Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. + * NOTE: the same rule from UniformData also applies here. + */ +struct PicaUniformsData { + void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup); + + struct BoolAligned { + alignas(16) int b; + }; + + std::array bools; + alignas(16) std::array i; + alignas(16) std::array f; +}; + +struct VSUniformData { + PicaUniformsData uniforms; +}; +static_assert(sizeof(VSUniformData) == 1856, + "The size of the VSUniformData does not match the structure in the shader"); +static_assert(sizeof(VSUniformData) < 16384, + "VSUniformData structure must be less than 16kb as per the OpenGL spec"); + + +} // namespace Pica::Shader diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 10f3ebc54..35e2d6ecd 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -11,6 +11,7 @@ #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/video_core.h" //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -43,15 +44,26 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory) g_memory = &memory; Pica::Init(); - OpenGL::GLES = Settings::values.graphics_api == Settings::GraphicsAPI::OpenGLES; + const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api; + switch (graphics_api) { + case Settings::GraphicsAPI::OpenGL: + case Settings::GraphicsAPI::OpenGLES: + OpenGL::GLES = Settings::values.graphics_api == Settings::GraphicsAPI::OpenGLES; + g_renderer = std::make_unique(emu_window); + break; + case Settings::GraphicsAPI::Vulkan: + g_renderer = std::make_unique(emu_window); + break; + default: + LOG_CRITICAL(Render, "Invalid graphics API enum value {}", graphics_api); + UNREACHABLE(); + } - g_renderer = std::make_unique(emu_window); ResultStatus result = g_renderer->Init(); - if (result != ResultStatus::Success) { - LOG_ERROR(Render, "initialization failed !"); + LOG_ERROR(Render, "Video core initialization failed"); } else { - LOG_DEBUG(Render, "initialized OK"); + LOG_INFO(Render, "Video core initialization OK"); } return result;