From 1cd0b04399bce209a6b80de63e84067d9b3faa47 Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Sun, 18 Sep 2022 20:05:18 +0300 Subject: [PATCH] renderer_vulkan: Fix some validation errors * Temporarily add glm until I figure out how to fix the alignment --- .gitmodules | 3 + CMakeLists.txt | 3 +- externals/CMakeLists.txt | 3 + externals/glm | 1 + src/CMakeLists.txt | 1 + src/citra_qt/CMakeLists.txt | 4 + src/citra_qt/bootmanager.cpp | 5 + src/common/CMakeLists.txt | 1 + src/common/file_util.cpp | 2 + src/common/logging/formatter.h | 2 +- .../arm/dyncom/arm_dyncom_interpreter.cpp | 2 +- src/core/memory.cpp | 48 +--- src/core/memory.h | 3 - src/video_core/CMakeLists.txt | 2 +- .../rasterizer_cache/rasterizer_cache.h | 10 +- src/video_core/renderer_opengl/gl_driver.cpp | 11 +- .../renderer_opengl/gl_texture_runtime.h | 4 +- src/video_core/renderer_vulkan/pica_to_vk.h | 2 +- .../renderer_vulkan/renderer_vulkan.cpp | 242 ++++++++++-------- .../renderer_vulkan/renderer_vulkan.h | 5 +- src/video_core/renderer_vulkan/vk_common.h | 6 +- .../renderer_vulkan/vk_instance.cpp | 19 +- src/video_core/renderer_vulkan/vk_instance.h | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 ++- src/video_core/renderer_vulkan/vk_platform.h | 10 +- .../renderer_vulkan/vk_rasterizer.cpp | 108 ++++---- .../renderer_vulkan/vk_renderpass_cache.cpp | 4 +- src/video_core/renderer_vulkan/vk_shader.cpp | 2 +- .../renderer_vulkan/vk_shader_gen.cpp | 2 +- .../renderer_vulkan/vk_stream_buffer.cpp | 32 +-- .../renderer_vulkan/vk_stream_buffer.h | 1 + .../renderer_vulkan/vk_swapchain.cpp | 2 + .../renderer_vulkan/vk_task_scheduler.cpp | 1 - .../renderer_vulkan/vk_texture_runtime.cpp | 18 +- .../renderer_vulkan/vk_texture_runtime.h | 7 +- src/video_core/texture/texture_decode.cpp | 4 +- src/web_service/verify_user_jwt.cpp | 2 +- 37 files changed, 326 insertions(+), 278 deletions(-) create mode 160000 externals/glm diff --git a/.gitmodules b/.gitmodules index 3e33bd5a1..17b412201 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "glslang"] path = externals/glslang url = https://github.com/KhronosGroup/glslang +[submodule "glm"] + path = externals/glm + url = https://github.com/g-truc/glm diff --git a/CMakeLists.txt b/CMakeLists.txt index a126718a2..1f168dd8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ cmake_policy(SET CMP0069 NEW) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") include(DownloadExternals) +include(GNUInstallDirs) include(CMakeDependentOption) project(citra LANGUAGES C CXX ASM) @@ -157,7 +158,7 @@ message(STATUS "Target architecture: ${ARCHITECTURE}") # Configure C++ standard # =========================== -set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) # set up output paths for executable binaries diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index df5c9dde4..c9c3588e5 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -60,6 +60,9 @@ add_subdirectory(glad) # glslang add_subdirectory(glslang) +# glm +add_subdirectory(glm) + # inih add_subdirectory(inih) diff --git a/externals/glm b/externals/glm new file mode 160000 index 000000000..cc98465e3 --- /dev/null +++ b/externals/glm @@ -0,0 +1 @@ +Subproject commit cc98465e3508535ba8c7f6208df934c156a018dc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e2d79f730..e11103d97 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -103,6 +103,7 @@ else() if (MINGW) add_definitions(-DMINGW_HAS_SECURE_API) + add_compile_options("-Wa,-mbig-obj") if (COMPILE_WITH_DWARF) add_compile_options("-gdwarf") endif() diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index b93ee9ccd..46117f9e9 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -263,6 +263,10 @@ target_link_libraries(citra-qt PRIVATE audio_core common core input_common netwo target_link_libraries(citra-qt PRIVATE Boost::boost glad nihstro-headers Qt5::Widgets Qt5::Multimedia) target_link_libraries(citra-qt PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) +if (NOT WIN32) + target_include_directories(citra-qt PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) +endif() + target_compile_definitions(citra-qt PRIVATE # Use QStringBuilder for string concatenation to reduce # the overall number of temporary strings created. diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index b95a9cd6e..b33bce00a 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -25,6 +25,10 @@ #include "video_core/renderer_base.h" #include "video_core/video_core.h" +#if !defined(WIN32) +#include +#endif + EmuThread::EmuThread(Frontend::GraphicsContext& core_context) : core_context(core_context) {} EmuThread::~EmuThread() = default; @@ -50,6 +54,7 @@ void EmuThread::run() { }); emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); + emit HideLoadingScreen(); core_context.MakeCurrent(); diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2860a91e5..725c201b4 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -77,6 +77,7 @@ add_library(common STATIC logging/backend.h logging/filter.cpp logging/filter.h + logging/formatter.h logging/log.h logging/text_formatter.cpp logging/text_formatter.h diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 436d94406..1d7c2654d 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp @@ -31,8 +31,10 @@ #endif // 64 bit offsets for MSVC and MinGW. MinGW also needs this for using _wstat64 +#ifndef __MINGW64__ #define stat _stat64 #define fstat _fstat64 +#endif #else #ifdef __APPLE__ diff --git a/src/common/logging/formatter.h b/src/common/logging/formatter.h index ad6adb143..3d4d70902 100644 --- a/src/common/logging/formatter.h +++ b/src/common/logging/formatter.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include // adapted from https://github.com/fmtlib/fmt/issues/2704 // a generic formatter for enum classes diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 72e1ebe06..16b662372 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -972,7 +972,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a // clunky switch statement. -#if defined __GNUC__ || defined __clang__ +#if defined __GNUC__ || (defined __clang__ && !defined _MSC_VER) #define GOTO_NEXT_INST \ GDB_BP_CHECK; \ if (num_instrs >= cpu->NumInstrsToExecute) \ diff --git a/src/core/memory.cpp b/src/core/memory.cpp index c9293bc55..f82b77976 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -514,33 +514,6 @@ bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const { return GetPhysicalRef(paddr); } -PAddr MemorySystem::ClampPhysicalAddress(PAddr base, PAddr address) const { - struct MemoryArea { - PAddr paddr_base; - u32 size; - }; - - constexpr std::array memory_areas = { - MemoryArea{VRAM_PADDR, VRAM_SIZE}, - MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE}, - MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE}, - MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE}, - }; - - const auto area = - std::ranges::find_if(memory_areas, [&](const MemoryArea& area) { - return base >= area.paddr_base && base <= area.paddr_base + area.size; - }); - - if (area == memory_areas.end()) { - LOG_ERROR(HW_Memory, "Unknown base address used for clamping {:#08X} at PC {:#08X}", base, - Core::GetRunningCore().GetPC()); - return address; - } - - return std::clamp(address, area->paddr_base, area->paddr_base + area->size); -} - u8* MemorySystem::GetPointer(const VAddr vaddr) { u8* page_pointer = impl->current_page_table->pointers[vaddr >> CITRA_PAGE_BITS]; if (page_pointer) { @@ -594,23 +567,18 @@ u8* MemorySystem::GetPhysicalPointer(PAddr address) { } MemoryRef MemorySystem::GetPhysicalRef(PAddr address) const { - struct MemoryArea { - PAddr paddr_base; - u32 size; - }; - constexpr std::array memory_areas = { - MemoryArea{VRAM_PADDR, VRAM_SIZE}, - MemoryArea{DSP_RAM_PADDR, DSP_RAM_SIZE}, - MemoryArea{FCRAM_PADDR, FCRAM_N3DS_SIZE}, - MemoryArea{N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE}, + std::make_pair(VRAM_PADDR, VRAM_SIZE), + std::make_pair(DSP_RAM_PADDR, DSP_RAM_SIZE), + std::make_pair(FCRAM_PADDR, FCRAM_N3DS_SIZE), + std::make_pair(N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE), }; const auto area = - std::ranges::find_if(memory_areas, [&](const MemoryArea& area) { + std::ranges::find_if(memory_areas, [&](const auto& area) { // Note: the region end check is inclusive because the user can pass in an address that // represents an open right bound - return address >= area.paddr_base && address <= area.paddr_base + area.size; + return address >= area.first && address <= area.first + area.second; }); if (area == memory_areas.end()) { @@ -619,10 +587,10 @@ MemoryRef MemorySystem::GetPhysicalRef(PAddr address) const { return nullptr; } - u32 offset_into_region = address - area->paddr_base; + u32 offset_into_region = address - area->first; std::shared_ptr target_mem = nullptr; - switch (area->paddr_base) { + switch (area->first) { case VRAM_PADDR: target_mem = impl->vram_mem; break; diff --git a/src/core/memory.h b/src/core/memory.h index 418066939..53d7e2f0e 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -565,9 +565,6 @@ public: /// Returns true if the address refers to a valid memory region bool IsValidPhysicalAddress(PAddr paddr) const; - /// Clamps the address to the boundaries of the memory region pointed by base - PAddr ClampPhysicalAddress(PAddr base, PAddr address) const; - /// Gets offset in FCRAM from a pointer inside FCRAM range u32 GetFCRAMOffset(const u8* pointer) const; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c0ecc33a2..35e7126b8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -189,7 +189,7 @@ create_target_directory_groups(video_core) target_include_directories(video_core PRIVATE ../../externals/vulkan-headers/include) target_include_directories(video_core PRIVATE ../../externals/vma) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad glslang SPIRV nihstro-headers Boost::serialization) +target_link_libraries(video_core PRIVATE glad glm::glm SPIRV glslang nihstro-headers Boost::serialization) set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if (ARCHITECTURE_x86_64) diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 62fb394f8..26d50ed07 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -47,9 +47,9 @@ class RasterizerAccelerated; template class RasterizerCache : NonCopyable { public: - using TextureRuntime = typename T::Runtime; - using Surface = std::shared_ptr; - using Watcher = SurfaceWatcher; + using TextureRuntime = typename T::RuntimeType; + using Surface = std::shared_ptr; + using Watcher = SurfaceWatcher; private: /// Declare rasterizer interval types @@ -755,7 +755,7 @@ auto RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& confi params.type = SurfaceType::Fill; params.res_scale = std::numeric_limits::max(); - Surface new_surface = std::make_shared(params, runtime); + Surface new_surface = std::make_shared(params, runtime); std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); if (config.fill_32bit) { @@ -1211,7 +1211,7 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& r template auto RasterizerCache::CreateSurface(SurfaceParams& params) -> Surface { - Surface surface = std::make_shared(params, runtime); + Surface surface = std::make_shared(params, runtime); surface->invalid_regions.insert(surface->GetInterval()); return surface; diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 9dd6cbb44..83dd57718 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -112,14 +112,15 @@ void Driver::ReportDriverInfo() { } void Driver::DeduceVendor() { - if (gpu_vendor.contains("NVIDIA")) { + if (gpu_vendor.find("NVIDIA") != gpu_vendor.npos) { vendor = Vendor::Nvidia; - } else if (gpu_vendor.contains("ATI") || - gpu_vendor.contains("Advanced Micro Devices")) { + } else if ((gpu_vendor.find("ATI") != gpu_vendor.npos) || + (gpu_vendor.find("AMD") != gpu_vendor.npos) || + (gpu_vendor.find("Advanced Micro Devices") != gpu_vendor.npos)) { vendor = Vendor::AMD; - } else if (gpu_vendor.contains("Intel")) { + } else if (gpu_vendor.find("Intel") != gpu_vendor.npos) { vendor = Vendor::Intel; - } else if (gpu_vendor.contains("GDI Generic")) { + } else if (gpu_vendor.find("GDI Generic") != gpu_vendor.npos) { vendor = Vendor::Generic; } } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index d18450cb6..7843e495c 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -152,8 +152,8 @@ public: }; struct Traits { - using Runtime = TextureRuntime; - using Surface = Surface; + using RuntimeType = TextureRuntime; + using SurfaceType = Surface; }; using RasterizerCache = VideoCore::RasterizerCache; diff --git a/src/video_core/renderer_vulkan/pica_to_vk.h b/src/video_core/renderer_vulkan/pica_to_vk.h index 6cbf3ecfa..3c9dc654f 100644 --- a/src/video_core/renderer_vulkan/pica_to_vk.h +++ b/src/video_core/renderer_vulkan/pica_to_vk.h @@ -250,7 +250,7 @@ inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { return vk::CullModeFlagBits::eNone; case Pica::RasterizerRegs::CullMode::KeepClockWise: case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: - return vk::CullModeFlagBits::eBack; + return vk::CullModeFlagBits::eNone; } } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 3d833f58a..0c26b07df 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -3,6 +3,8 @@ // Refer to the license.txt file included. #define VULKAN_HPP_NO_CONSTRUCTORS +#define GLM_FORCE_DEPTH_ZERO_TO_ONE +#include #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -32,8 +34,8 @@ layout (location = 0) out vec2 frag_tex_coord; // The third column performs translation. // The third row could be used for projection, which we don't need in 2D. It hence is assumed to // implicitly be [0, 0, 1] -layout (push_constant) uniform DrawInfo { - mat3x2 modelview_matrix; +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; vec4 i_resolution; vec4 o_resolution; int screen_id_l; @@ -42,39 +44,38 @@ layout (push_constant) uniform DrawInfo { }; void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); - gl_Position.y = -gl_Position.y; + vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix; + gl_Position = vec4(position.x, -position.y, 0.0, 1.0); frag_tex_coord = vert_tex_coord; } )"; constexpr std::string_view fragment_shader = R"( -version 450 core +#version 450 core #extension GL_ARB_separate_shader_objects : enable layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; -layout (push_constant) uniform DrawInfo { - mat3x2 modelview_matrix; +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; vec4 i_resolution; vec4 o_resolution; int screen_id_l; int screen_id_r; int layer; + int reverse_interlaced; }; -layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; +layout (set = 0, binding = 0) uniform texture2D screen_textures[3]; +layout (set = 0, binding = 1) uniform sampler screen_sampler; void main() { - color = texture(screen_textures[screen_id_l], frag_tex_coord); + color = texture(sampler2D(screen_textures[screen_id_l], screen_sampler), frag_tex_coord); } )"; constexpr std::string_view fragment_shader_anaglyph = R"( -version 450 core +#version 450 core #extension GL_ARB_separate_shader_objects : enable layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; @@ -91,32 +92,8 @@ const mat3 r = mat3(-0.011,-0.032,-0.007, 0.377, 0.761, 0.009, -0.026,-0.093, 1.234); -layout (push_constant) uniform DrawInfo { - mat3x2 modelview_matrix; - vec4 i_resolution; - vec4 o_resolution; - int screen_id_l; - int screen_id_r; - int layer; -}; - -layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; - -void main() { - vec4 color_tex_l = texture(screen_textures[screen_id_l], frag_tex_coord); - vec4 color_tex_r = texture(screen_textures[screen_id_r], frag_tex_coord); - color = vec4(color_tex_l.rgb*l+color_tex_r.rgb*r, color_tex_l.a); -} -)"; - -constexpr std::string_view fragment_shader_interlaced = R"( -version 450 core -#extension GL_ARB_separate_shader_objects : enable -layout (location = 0) in vec2 frag_tex_coord; -layout (location = 0) out vec4 color; - -layout (push_constant) uniform DrawInfo { - mat3x2 modelview_matrix; +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; vec4 i_resolution; vec4 o_resolution; int screen_id_l; @@ -125,14 +102,41 @@ layout (push_constant) uniform DrawInfo { int reverse_interlaced; }; -layout (set = 0, binding = 0) uniform sampler2D screen_textures[3]; +layout (set = 0, binding = 0) uniform texture2D screen_textures[3]; +layout (set = 0, binding = 1) uniform sampler screen_sampler; + +void main() { + vec4 color_tex_l = texture(sampler2D(screen_textures[screen_id_l], screen_sampler), frag_tex_coord); + vec4 color_tex_r = texture(sampler2D(screen_textures[screen_id_r], screen_sampler), frag_tex_coord); + color = vec4(color_tex_l.rgb*l+color_tex_r.rgb*r, color_tex_l.a); +} +)"; + +constexpr std::string_view fragment_shader_interlaced = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (push_constant, std140) uniform DrawInfo { + mat4 modelview_matrix; + vec4 i_resolution; + vec4 o_resolution; + int screen_id_l; + int screen_id_r; + int layer; + int reverse_interlaced; +}; + +layout (set = 0, binding = 0) uniform texture2D screen_textures[3]; +layout (set = 0, binding = 1) uniform sampler screen_sampler; void main() { float screen_row = o_resolution.x * frag_tex_coord.x; if (int(screen_row) % 2 == reverse_interlaced) - color = texture(screen_textures[screen_id_l], frag_tex_coord); + color = texture(sampler2D(screen_textures[screen_id_l], screen_sampler), frag_tex_coord); else - color = texture(screen_textures[screen_id_r], frag_tex_coord); + color = texture(sampler2D(screen_textures[screen_id_r], screen_sampler), frag_tex_coord); } )"; @@ -194,15 +198,16 @@ RendererVulkan::RendererVulkan(Frontend::EmuWindow& window) RendererVulkan::~RendererVulkan() { vk::Device device = instance.GetDevice(); - device.destroyPipelineLayout(present_pipeline_layout); + device.destroyShaderModule(present_vertex_shader); device.destroyDescriptorSetLayout(present_descriptor_layout); device.destroyDescriptorUpdateTemplate(present_update_template); - device.destroyShaderModule(present_vertex_shader); + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { device.destroyPipeline(present_pipelines[i]); device.destroyShaderModule(present_shaders[i]); } + for (std::size_t i = 0; i < present_samplers.size(); i++) { device.destroySampler(present_samplers[i]); } @@ -268,27 +273,18 @@ void RendererVulkan::BeginRendering() { vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[current_pipeline]); + std::array present_textures; for (std::size_t i = 0; i < screen_infos.size(); i++) { - runtime.Transition(command_buffer, screen_infos[i].display_texture, - vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + const auto& info = screen_infos[i]; + present_textures[i] = vk::DescriptorImageInfo{ + .imageView = info.display_texture ? info.display_texture->image_view + : info.texture.alloc.image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal + }; } - const std::array present_textures = { - vk::DescriptorImageInfo{ - .sampler = present_samplers[current_sampler], - .imageView = screen_infos[0].display_texture.image_view, - .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal - }, - vk::DescriptorImageInfo{ - .sampler = present_samplers[current_sampler], - .imageView = screen_infos[1].display_texture.image_view, - .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal - }, - vk::DescriptorImageInfo{ - .sampler = present_samplers[current_sampler], - .imageView = screen_infos[2].display_texture.image_view, - .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal - }, + present_textures[3] = vk::DescriptorImageInfo{ + .sampler = present_samplers[current_sampler] }; const vk::DescriptorSetAllocateInfo alloc_info = { @@ -299,7 +295,7 @@ void RendererVulkan::BeginRendering() { vk::Device device = instance.GetDevice(); vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; - device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures.data()); + device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures[0]); command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0, 1, &set, 0, nullptr); @@ -398,35 +394,55 @@ void RendererVulkan::CompileShaders() { } void RendererVulkan::BuildLayouts() { - const vk::DescriptorSetLayoutBinding present_layout_binding = { - .binding = 0, - .descriptorType = vk::DescriptorType::eCombinedImageSampler, - .descriptorCount = 3, - .stageFlags = vk::ShaderStageFlagBits::eFragment + const std::array present_layout_bindings = { + vk::DescriptorSetLayoutBinding{ + .binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 3, + .stageFlags = vk::ShaderStageFlagBits::eFragment + }, + vk::DescriptorSetLayoutBinding{ + .binding = 1, + .descriptorType = vk::DescriptorType::eSampler, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eFragment + } }; const vk::DescriptorSetLayoutCreateInfo present_layout_info = { - .bindingCount = 1, - .pBindings = &present_layout_binding - }; - - const vk::DescriptorUpdateTemplateEntry update_template_entry = { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 3, - .descriptorType = vk::DescriptorType::eCombinedImageSampler, - .offset = 0, - .stride = sizeof(vk::DescriptorImageInfo) - }; - - const vk::DescriptorUpdateTemplateCreateInfo template_info = { - .descriptorUpdateEntryCount = 1, - .pDescriptorUpdateEntries = &update_template_entry, - .descriptorSetLayout = present_descriptor_layout + .bindingCount = static_cast(present_layout_bindings.size()), + .pBindings = present_layout_bindings.data() }; vk::Device device = instance.GetDevice(); present_descriptor_layout = device.createDescriptorSetLayout(present_layout_info); + + const std::array update_template_entries = { + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 3, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo) + }, + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampler, + .offset = 3 * sizeof(vk::DescriptorImageInfo), + .stride = 0 + } + }; + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), + .pDescriptorUpdateEntries = update_template_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = present_descriptor_layout + }; + present_update_template = device.createDescriptorUpdateTemplate(template_info); const vk::PushConstantRange push_range = { @@ -639,8 +655,22 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, 0, sizeof(draw_info), &draw_info); + const vk::ClearValue clear_value = { + .color = clear_color + }; + + const vk::RenderPassBeginInfo begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = swapchain.GetFramebuffer(), + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + command_buffer.endRenderPass(); } void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h) { @@ -675,8 +705,22 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, 0, sizeof(draw_info), &draw_info); + const vk::ClearValue clear_value = { + .color = clear_color + }; + + const vk::RenderPassBeginInfo begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = swapchain.GetFramebuffer(), + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + command_buffer.endRenderPass(); } void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, @@ -778,8 +822,11 @@ void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool f const auto& bottom_screen = layout.bottom_screen; // Set projection matrix - draw_info.modelview = - MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height), flipped); + //draw_info.modelview = + // MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height), flipped); + draw_info.modelview = glm::transpose(glm::ortho(0.f, static_cast(layout.width), + static_cast(layout.height), 0.0f, + 0.f, 1.f)); const bool stereo_single_screen = Settings::values.render_3d == Settings::StereoRenderOption::Anaglyph || @@ -920,6 +967,8 @@ void RendererVulkan::SwapBuffers() { swapchain.Create(layout.width, layout.height, false); } + swapchain.AcquireNextImage(); + const vk::Viewport viewport = { .x = 0.0f, .y = 0.0f, @@ -934,21 +983,14 @@ void RendererVulkan::SwapBuffers() { .extent = {layout.width, layout.height} }; - const vk::ClearValue clear_value = { - .color = clear_color - }; - - const vk::RenderPassBeginInfo begin_info = { - .renderPass = renderpass_cache.GetPresentRenderpass(), - .framebuffer = swapchain.GetFramebuffer(), - .clearValueCount = 1, - .pClearValues = &clear_value, - }; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); command_buffer.setViewport(0, viewport); command_buffer.setScissor(0, scissor); - command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + + for (auto& info : screen_infos) { + auto alloc = info.display_texture ? info.display_texture : &info.texture.alloc; + runtime.Transition(command_buffer, *alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + } DrawScreens(layout, false); @@ -956,8 +998,8 @@ void RendererVulkan::SwapBuffers() { vertex_buffer.Flush(); rasterizer->FlushBuffers(); - command_buffer.endRenderPass(); scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore()); + swapchain.Present(); // Inform texture runtime about the switch runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex()); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index c0f3c128c..5923e232a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" #include "common/math_util.h" #include "core/hw/gpu.h" @@ -30,7 +31,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for each 3DS screen struct ScreenInfo { - ImageAlloc display_texture; + ImageAlloc* display_texture = nullptr; Common::Rectangle display_texcoords; TextureInfo texture; vk::Sampler sampler; @@ -38,7 +39,7 @@ struct ScreenInfo { // Uniform data used for presenting the 3DS screens struct PresentUniformData { - std::array modelview; + glm::mat4 modelview; Common::Vec4f i_resolution; Common::Vec4f o_resolution; int screen_id_l = 0; diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index 8fcdfcd62..ba9bb6f86 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -44,7 +44,8 @@ constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) { if (aspect & vk::ImageAspectFlagBits::eDepth) { return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment; } else { - return usage | vk::ImageUsageFlagBits::eColorAttachment; + return usage | vk::ImageUsageFlagBits::eStorage | + vk::ImageUsageFlagBits::eColorAttachment; } }; @@ -59,7 +60,8 @@ constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) if (aspect & vk::ImageAspectFlagBits::eDepth) { return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment; } else { - return usage | vk::FormatFeatureFlagBits::eColorAttachment; + return usage | vk::FormatFeatureFlagBits::eStorageImage | + vk::FormatFeatureFlagBits::eColorAttachment; } }; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 371338ffc..a5958c484 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -51,11 +51,12 @@ Instance::Instance(Frontend::EmuWindow& window) { surface = CreateSurface(instance, window); // TODO: GPU select dialog - physical_device = instance.enumeratePhysicalDevices()[0]; - device_limits = physical_device.getProperties().limits; + auto physical_devices = instance.enumeratePhysicalDevices(); + physical_device = physical_devices[0]; + device_properties = physical_device.getProperties(); // Create logical device - CreateDevice(true); + CreateDevice(false); } Instance::~Instance() { @@ -154,11 +155,6 @@ bool Instance::CreateDevice(bool validation_enabled) { return false; } - // List available device extensions - for (const auto& extension : extension_list) { - LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName); - } - // Helper lambda for adding extensions std::array enabled_extensions; u32 enabled_extension_count = 0; @@ -223,7 +219,6 @@ bool Instance::CreateDevice(bool validation_enabled) { static constexpr float queue_priorities[] = {1.0f}; - const std::array layers = {"VK_LAYER_KHRONOS_validation"}; const std::array queue_infos = { vk::DeviceQueueCreateInfo{ .queueFamilyIndex = graphics_queue_family_index, @@ -249,12 +244,6 @@ bool Instance::CreateDevice(bool validation_enabled) { device_info.queueCreateInfoCount = 2; } - // Enable debug layer on debug builds - if (validation_enabled) { - device_info.enabledLayerCount = static_cast(layers.size()); - device_info.ppEnabledLayerNames = layers.data(); - } - // Create logical device device = physical_device.createDevice(device_info); VULKAN_HPP_DEFAULT_DISPATCHER.init(device); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d4b47bf99..a7ab34ed9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -98,7 +98,7 @@ public: /// Returns the minimum required alignment for uniforms vk::DeviceSize UniformMinAlignment() const { - return device_limits.minUniformBufferOffsetAlignment; + return device_properties.limits.minUniformBufferOffsetAlignment; } private: @@ -116,7 +116,6 @@ private: vk::PhysicalDevice physical_device; vk::Instance instance; vk::SurfaceKHR surface; - vk::PhysicalDeviceLimits device_limits; vk::PhysicalDeviceProperties device_properties; VmaAllocator allocator; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 74975845c..3b049b31b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -139,6 +139,7 @@ PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, descriptor_dirty.fill(true); LoadDiskCache(); + BuildLayout(); trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex, instance.GetDevice(), ShaderOptimization::Debug); } @@ -239,25 +240,21 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { } void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { - const DescriptorData data = { - .image_info = vk::DescriptorImageInfo{ - .imageView = image_view, - .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal - } + const vk::DescriptorImageInfo image_info = { + .imageView = image_view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal }; - SetBinding(1, binding, data); + SetBinding(1, binding, DescriptorData{image_info}); } void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { - const DescriptorData data = { - .image_info = vk::DescriptorImageInfo{ - .imageView = image_view, - .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal - } + const vk::DescriptorImageInfo image_info = { + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral }; - SetBinding(3, binding, data); + SetBinding(3, binding, DescriptorData{image_info}); } void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { @@ -370,6 +367,7 @@ void PipelineCache::BuildLayout() { const vk::DescriptorUpdateTemplateCreateInfo template_info = { .descriptorUpdateEntryCount = set.binding_count, .pDescriptorUpdateEntries = update_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, .descriptorSetLayout = descriptor_set_layouts[i] }; @@ -398,7 +396,7 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { continue; } - shader_stages[i] = vk::PipelineShaderStageCreateInfo{ + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ .stage = ToVkShaderStage(i), .module = shader, .pName = "main" @@ -569,6 +567,8 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { return VK_NULL_HANDLE; } +static_assert(sizeof(vk::DescriptorBufferInfo) == sizeof(VkDescriptorBufferInfo)); + void PipelineCache::BindDescriptorSets() { vk::Device device = instance.GetDevice(); for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { @@ -580,7 +580,7 @@ void PipelineCache::BindDescriptorSets() { }; vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; - device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i].data()); + device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]); descriptor_sets[i] = set; descriptor_dirty[i] = false; @@ -600,6 +600,7 @@ void PipelineCache::LoadDiskCache() { FileUtil::IOFile cache_file{cache_path, "r"}; if (!cache_file.IsOpen()) { LOG_INFO(Render_Vulkan, "No pipeline cache found"); + return; } const u32 cache_file_size = cache_file.GetSize(); diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h index c7d056667..2624ce876 100644 --- a/src/video_core/renderer_vulkan/vk_platform.h +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -22,6 +22,7 @@ #define VULKAN_HPP_NO_CONSTRUCTORS #include +#include "common/assert.h" #include "common/logging/log.h" #include "core/frontend/emu_window.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -41,13 +42,16 @@ inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface"); + UNREACHABLE(); } } #elif VK_USE_PLATFORM_XLIB_KHR if (window_info.type == Frontend::WindowSystemType::X11) { - const vk::XlibSurfaceCreateInfoKHR xlib_ci{{}, - static_cast(window_info.display_connection), - reinterpret_cast(window_info.render_surface)}; + const vk::XlibSurfaceCreateInfoKHR xlib_ci = { + .dpy = static_cast(window_info.display_connection), + .window = reinterpret_cast(window_info.render_surface) + }; + if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) { LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); UNREACHABLE(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0d698c8b9..604d1373b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -124,7 +124,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan default_texture = runtime.Allocate(1, 1, VideoCore::PixelFormat::RGBA8, VideoCore::TextureType::Texture2D); runtime.Transition(scheduler.GetUploadCommandBuffer(), default_texture, - vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1); + vk::ImageLayout::eGeneral, 0, 1); uniform_block_data.lighting_lut_dirty.fill(true); @@ -149,6 +149,14 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize // all descriptor sets even the ones we don't use. Use default_texture for this + const u32 vs_uniform_size = sizeof(Pica::Shader::VSUniformData); + const u32 fs_uniform_size = sizeof(Pica::Shader::UniformData); + pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), 0, vs_uniform_size); + pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), vs_uniform_size, fs_uniform_size); + pipeline_cache.BindTexelBuffer(2, texture_lf_buffer.GetView()); + pipeline_cache.BindTexelBuffer(3, texture_buffer.GetView(0)); + pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1)); + for (u32 i = 0; i < 4; i++) { pipeline_cache.BindTexture(i, default_texture.image_view); pipeline_cache.BindSampler(i, default_sampler); @@ -584,48 +592,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { surfaces_rect.bottom, surfaces_rect.top)) }; - auto valid_surface = color_surface ? color_surface : depth_surface; - const FramebufferInfo framebuffer_info = { - .color = color_surface ? color_surface->alloc.image_view : VK_NULL_HANDLE, - .depth = depth_surface ? depth_surface->alloc.image_view : VK_NULL_HANDLE, - .renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment, - pipeline_info.depth_attachment, false), - .width = valid_surface->GetScaledWidth(), - .height = valid_surface->GetScaledHeight() - }; - - auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{}); - if (new_framebuffer) { - it->second = CreateFramebuffer(framebuffer_info); - } - - ImageAlloc color_alloc = - color_surface ? color_surface->alloc : ImageAlloc{}; - ImageAlloc depth_alloc = - depth_surface ? depth_surface->alloc : ImageAlloc{}; - - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - runtime.Transition(command_buffer, color_alloc, - vk::ImageLayout::eColorAttachmentOptimal, 0, color_alloc.levels); - runtime.Transition(command_buffer, depth_alloc, - vk::ImageLayout::eDepthStencilReadOnlyOptimal, 0, depth_alloc.levels); - - const vk::RenderPassBeginInfo renderpass_begin = { - .renderPass = - renderpass_cache.GetRenderpass(pipeline_info.color_attachment, - pipeline_info.depth_attachment, false), - .framebuffer = it->second, - .renderArea = vk::Rect2D{ - .offset = {static_cast(draw_rect.left), static_cast(draw_rect.bottom)}, - .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()} - }, - - .clearValueCount = 0, - .pClearValues = nullptr - }; - - renderpass_cache.EnterRenderpass(renderpass_begin); - // Sync the viewport pipeline_cache.SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, @@ -659,13 +625,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { uniform_block_data.dirty = true; } - /*bool need_duplicate_texture = false; - auto CheckBarrier = [&need_duplicate_texture, &color_surface](vk::ImageView handle) { - if (color_surface && color_surface->alloc.image_view == handle) { - need_duplicate_texture = true; - } - };*/ - auto CheckBarrier = [this, &color_surface = color_surface](vk::ImageView image_view, u32 texture_index) { if (color_surface && color_surface->alloc.image_view == image_view) { //auto temp_tex = backend->CreateTexture(texture->GetInfo()); @@ -676,6 +635,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { } }; + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + // Sync and bind the texture surfaces const auto pica_textures = regs.texturing.GetTextures(); for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { @@ -847,6 +808,47 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Enable scissor test to prevent drawing outside of the framebuffer region pipeline_cache.SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(), draw_rect.GetHeight()); + auto valid_surface = color_surface ? color_surface : depth_surface; + const FramebufferInfo framebuffer_info = { + .color = color_surface ? color_surface->alloc.image_view : VK_NULL_HANDLE, + .depth = depth_surface ? depth_surface->alloc.image_view : VK_NULL_HANDLE, + .renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment, + pipeline_info.depth_attachment, false), + .width = valid_surface->GetScaledWidth(), + .height = valid_surface->GetScaledHeight() + }; + + auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{}); + if (new_framebuffer) { + it->second = CreateFramebuffer(framebuffer_info); + } + + if (color_surface) { + runtime.Transition(command_buffer, color_surface->alloc, + vk::ImageLayout::eColorAttachmentOptimal, + 0, color_surface->alloc.levels); + } + + if (depth_surface) { + runtime.Transition(command_buffer, depth_surface->alloc, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + 0, depth_surface->alloc.levels); + } + + const vk::RenderPassBeginInfo renderpass_begin = { + .renderPass = + renderpass_cache.GetRenderpass(pipeline_info.color_attachment, + pipeline_info.depth_attachment, false), + .framebuffer = it->second, + .renderArea = vk::Rect2D{ + .offset = {static_cast(draw_rect.left), static_cast(draw_rect.bottom)}, + .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()} + }, + + .clearValueCount = 0, + .pClearValues = nullptr + }; + // Draw the vertex batch bool succeeded = true; if (accelerate) { @@ -854,6 +856,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { } else { pipeline_cache.UseTrivialVertexShader(); pipeline_cache.UseTrivialGeometryShader(); + pipeline_cache.BindPipeline(pipeline_info); // Bind the vertex buffer at the current mapped offset. This effectively means // that when base_vertex is zero the GPU will start drawing from the current mapped @@ -872,8 +875,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); vertex_buffer.Commit(vertex_size); - pipeline_cache.BindPipeline(pipeline_info); + renderpass_cache.EnterRenderpass(renderpass_begin); command_buffer.draw(vertices, 1, base_vertex, 0); + renderpass_cache.ExitRenderpass(); } } @@ -1582,7 +1586,7 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - screen_info.display_texture = src_surface->alloc; + screen_info.display_texture = &src_surface->alloc; return true; } diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index af82c3a30..8e9293efb 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -109,9 +109,9 @@ void RenderpassCache::CreatePresentRenderpass(vk::Format format) { vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, bool is_clear) const { const u32 color_index = - color == VideoCore::PixelFormat::Invalid ? 0 : static_cast(color); + color == VideoCore::PixelFormat::Invalid ? 0 : (static_cast(color) + 1); const u32 depth_index = - depth == VideoCore::PixelFormat::Invalid ? 0 : (static_cast(depth) - 13); + depth == VideoCore::PixelFormat::Invalid ? 0 : (static_cast(depth) - 14); ASSERT(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS); return cached_renderpasses[color_index][depth_index][is_clear]; diff --git a/src/video_core/renderer_vulkan/vk_shader.cpp b/src/video_core/renderer_vulkan/vk_shader.cpp index 156b78d7c..ce0038328 100644 --- a/src/video_core/renderer_vulkan/vk_shader.cpp +++ b/src/video_core/renderer_vulkan/vk_shader.cpp @@ -7,8 +7,8 @@ #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_shader.h" #include -#include #include +#include namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 6940f3017..e2a151b8b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -36,7 +36,7 @@ struct LightSrc { float dist_atten_scale; }; -layout (std140) uniform shader_data { +layout (set = 0, binding = 1, std140) uniform shader_data { int framebuffer_scale; int alphatest_ref; float depth_scale; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 8b610643a..11f39712f 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -154,25 +154,27 @@ std::tuple StreamBuffer::Map(u32 size, u32 alignment) { } void StreamBuffer::Commit(u32 size) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + if (size > 0) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage); - const vk::BufferMemoryBarrier buffer_barrier = { - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = access_mask, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = buffer, - .offset = buffer_offset, - .size = size - }; + auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage); + const vk::BufferMemoryBarrier buffer_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = access_mask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buffer, + .offset = buffer_offset, + .size = size + }; - command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, - vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {}); + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, + vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {}); - buffer_offset += size; - available_size -= size; + buffer_offset += size; + available_size -= size; + } } void StreamBuffer::Flush() { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 06f089186..cae20ccac 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -5,6 +5,7 @@ #pragma once #include #include +#include #include "common/assert.h" #include "video_core/renderer_vulkan/vk_common.h" diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 80c88609c..d3e397ef4 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -84,7 +84,9 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) { device.destroyImageView(image.image_view); device.destroyFramebuffer(image.framebuffer); } + swapchain_images.clear(); + swapchain_images.resize(images.size()); std::ranges::transform(images, swapchain_images.begin(), [&](vk::Image image) -> Image { const vk::ImageViewCreateInfo view_info = { diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index 066ba2f5b..b5557bc2f 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -11,7 +11,6 @@ namespace Vulkan { TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} { - vk::Device device = instance.GetDevice(); const vk::CommandPoolCreateInfo command_pool_info = { .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index d3941a7a4..aba6aade0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -191,7 +191,8 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma .image = image, .image_view = image_view, .allocation = allocation, - .levels = levels + .aspect = aspect, + .levels = levels, }; } @@ -200,7 +201,11 @@ void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload, const VideoCore::SurfaceType type = VideoCore::GetFormatType(format); const vk::FormatFeatureFlagBits feature = ToVkFormatFeatures(type); if (!instance.IsFormatSupported(ToVkFormat(format), feature)) { - LOG_CRITICAL(Render_Vulkan, "Unimplemented format converion!"); + if (format == VideoCore::PixelFormat::RGB8 && upload) { + return Pica::Texture::ConvertBGRToRGBA(source, dest); + } + + LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format); UNREACHABLE(); } } @@ -431,6 +436,11 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al info.access = vk::AccessFlagBits::eTransferWrite; info.stage = vk::PipelineStageFlagBits::eTransfer; break; + case vk::ImageLayout::eGeneral: + info.access = vk::AccessFlagBits::eInputAttachmentRead; + info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput | + vk::PipelineStageFlagBits::eFragmentShader; + break; default: LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); UNREACHABLE(); @@ -467,7 +477,9 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()} { - alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); + if (params.pixel_format != VideoCore::PixelFormat::Invalid) { + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, texture_type); + } } Surface::~Surface() { diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index f308a7631..4ce5ae1e5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -44,6 +44,9 @@ public: RenderpassCache& renderpass_cache); ~TextureRuntime(); + TextureRuntime(const TextureRuntime&) = delete; + TextureRuntime& operator=(const TextureRuntime&) = delete; + /// Maps an internal staging buffer of the provided size of pixel uploads/downloads StagingData FindStaging(u32 size, bool upload); @@ -128,8 +131,8 @@ private: }; struct Traits { - using Runtime = TextureRuntime; - using Surface = Surface; + using RuntimeType = TextureRuntime; + using SurfaceType = Surface; }; using RasterizerCache = VideoCore::RasterizerCache; diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp index 940747b4d..feb48ff85 100644 --- a/src/video_core/texture/texture_decode.cpp +++ b/src/video_core/texture/texture_decode.cpp @@ -226,7 +226,7 @@ void ConvertBGRToRGB(std::span source, std::span des for (std::size_t i = 0; i < source.size(); i += 3) { u32 bgr{}; std::memcpy(&bgr, source.data() + i, 3); - const u32 rgb = std::byteswap(bgr << 8); + const u32 rgb = Common::swap32(bgr << 8); std::memcpy(dest.data(), &rgb, 3); } } @@ -245,7 +245,7 @@ void ConvertBGRToRGBA(std::span source, std::span de void ConvertABGRToRGBA(std::span source, std::span dest) { for (u32 i = 0; i < source.size(); i += 4) { const u32 abgr = *reinterpret_cast(source.data() + i); - const u32 rgba = std::byteswap(abgr); + const u32 rgba = Common::swap32(abgr); std::memcpy(dest.data() + i, &rgba, 4); } } diff --git a/src/web_service/verify_user_jwt.cpp b/src/web_service/verify_user_jwt.cpp index 27e08db9e..47c648f72 100644 --- a/src/web_service/verify_user_jwt.cpp +++ b/src/web_service/verify_user_jwt.cpp @@ -3,8 +3,8 @@ // Refer to the license.txt file included. #include -#include #include "common/logging/log.h" +#include #include "common/web_result.h" #include "web_service/verify_user_jwt.h" #include "web_service/web_backend.h"