From dade6e679741f2802b1d63d2d690a12b66b790fd Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Wed, 10 Aug 2022 17:38:56 +0300 Subject: [PATCH] video_core: Cleanups and fixes --- src/video_core/CMakeLists.txt | 9 - src/video_core/common/backend.h | 5 +- src/video_core/common/pipeline.h | 47 +- src/video_core/common/pipeline_cache.cpp | 13 +- src/video_core/common/pipeline_cache.h | 10 +- src/video_core/common/rasterizer.cpp | 51 +- src/video_core/common/rasterizer_cache.cpp | 4 +- src/video_core/common/renderer.cpp | 3 +- src/video_core/common/shader_runtime_cache.h | 3 +- src/video_core/common/texture.h | 1 + .../gl_format_reinterpreter.cpp | 2 + .../renderer_opengl/gl_format_reinterpreter.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 16 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 4 + .../renderer_opengl/gl_shader_decompiler.cpp | 13 +- .../renderer_opengl/gl_shader_decompiler.h | 2 +- .../renderer_opengl/gl_shader_disk_cache.cpp | 1 - .../renderer_opengl/gl_shader_disk_cache.h | 19 +- .../renderer_opengl/gl_shader_manager.cpp | 13 +- .../renderer_opengl/gl_shader_manager.h | 77 +- .../renderer_opengl/renderer_opengl.cpp | 5 +- .../renderer_opengl/renderer_opengl.h | 2 +- .../renderer_opengl/texture_downloader_es.cpp | 10 +- src/video_core/renderer_vulkan/vk_backend.cpp | 26 +- src/video_core/renderer_vulkan/vk_backend.h | 24 +- .../renderer_vulkan/vk_pipeline.cpp | 112 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 +- .../renderer_vulkan/vk_rasterizer.cpp | 1629 ----------------- .../renderer_vulkan/vk_rasterizer.h | 285 --- .../renderer_vulkan/vk_rasterizer_cache.cpp | 1513 --------------- .../renderer_vulkan/vk_rasterizer_cache.h | 346 ---- .../renderer_vulkan/vk_shader_gen.cpp | 21 +- .../renderer_vulkan/vk_shader_gen.h | 12 +- .../renderer_vulkan/vk_shader_state.h | 76 - src/video_core/renderer_vulkan/vk_state.cpp | 700 ------- src/video_core/renderer_vulkan/vk_state.h | 180 -- .../renderer_vulkan/vk_surface_params.cpp | 171 -- .../renderer_vulkan/vk_surface_params.h | 270 --- src/video_core/renderer_vulkan/vk_texture.cpp | 92 +- src/video_core/renderer_vulkan/vk_texture.h | 2 + 40 files changed, 358 insertions(+), 5449 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.h delete mode 100644 src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_rasterizer_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_shader_state.h delete mode 100644 src/video_core/renderer_vulkan/vk_state.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_state.h delete mode 100644 src/video_core/renderer_vulkan/vk_surface_params.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_surface_params.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 29b84c76c..b25115374 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -95,8 +95,6 @@ add_library(video_core STATIC renderer_opengl/gl_format_reinterpreter.cpp renderer_opengl/gl_format_reinterpreter.h renderer_vulkan/pica_to_vulkan.h - renderer_vulkan/renderer_vulkan.cpp - renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_backend.cpp renderer_vulkan/vk_backend.h renderer_vulkan/vk_buffer.cpp @@ -114,17 +112,10 @@ add_library(video_core STATIC renderer_vulkan/vk_pipeline.cpp renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_platform.h - renderer_vulkan/vk_rasterizer_cache.cpp - renderer_vulkan/vk_rasterizer_cache.h - renderer_vulkan/vk_rasterizer.cpp - renderer_vulkan/vk_rasterizer.h - renderer_vulkan/vk_shader_state.h renderer_vulkan/vk_shader_gen.cpp renderer_vulkan/vk_shader_gen.h renderer_vulkan/vk_shader.cpp renderer_vulkan/vk_shader.h - renderer_vulkan/vk_surface_params.cpp - renderer_vulkan/vk_surface_params.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_task_scheduler.cpp diff --git a/src/video_core/common/backend.h b/src/video_core/common/backend.h index 534f577b2..d7a93c34f 100644 --- a/src/video_core/common/backend.h +++ b/src/video_core/common/backend.h @@ -16,7 +16,7 @@ namespace VideoCore { // A piece of information the video frontend can query the backend about enum class Query { - PresentFormat = 0 + UniformAlignment = 0, }; // Common interface of a video backend @@ -37,6 +37,9 @@ public: // Asks the driver about a particular piece of information virtual u64 QueryDriver(Query query) = 0; + // Returns the has of the pipeline info struct accounting for dynamic states + virtual u64 PipelineInfoHash(const PipelineInfo& info) = 0; + // Creates a backend specific texture handle virtual TextureHandle CreateTexture(TextureInfo info) = 0; diff --git a/src/video_core/common/pipeline.h b/src/video_core/common/pipeline.h index 39e439bae..624d25cb2 100644 --- a/src/video_core/common/pipeline.h +++ b/src/video_core/common/pipeline.h @@ -62,19 +62,23 @@ union RasterizationState { BitField<4, 2, Pica::CullMode> cull_mode; }; -union DepthStencilState { - u64 value = 0; - BitField<0, 1, u64> depth_test_enable; - BitField<1, 1, u64> depth_write_enable; - BitField<2, 1, u64> stencil_test_enable; - BitField<3, 3, Pica::CompareFunc> depth_compare_op; - BitField<6, 3, Pica::StencilAction> stencil_fail_op; - BitField<9, 3, Pica::StencilAction> stencil_pass_op; - BitField<12, 3, Pica::StencilAction> stencil_depth_fail_op; - BitField<15, 3, Pica::CompareFunc> stencil_compare_op; - BitField<18, 8, u64> stencil_reference; - BitField<26, 8, u64> stencil_compare_mask; - BitField<34, 8, u64> stencil_write_mask; +struct DepthStencilState { + union { + u32 value = 0; + BitField<0, 1, u32> depth_test_enable; + BitField<1, 1, u32> depth_write_enable; + BitField<2, 1, u32> stencil_test_enable; + BitField<3, 3, Pica::CompareFunc> depth_compare_op; + BitField<6, 3, Pica::StencilAction> stencil_fail_op; + BitField<9, 3, Pica::StencilAction> stencil_pass_op; + BitField<12, 3, Pica::StencilAction> stencil_depth_fail_op; + BitField<15, 3, Pica::CompareFunc> stencil_compare_op; + }; + + // These are dynamic on most graphics APIs so keep them separate + u8 stencil_reference; + u8 stencil_compare_mask; + u8 stencil_write_mask; }; union BlendState { @@ -131,14 +135,10 @@ struct PipelineInfo { VertexLayout vertex_layout{}; PipelineLayoutInfo layout{}; BlendState blending{}; - DepthStencilState depth_stencil{}; - RasterizationState rasterization{}; TextureFormat color_attachment = TextureFormat::RGBA8; TextureFormat depth_attachment = TextureFormat::D24S8; - - const u64 Hash() const { - return Common::ComputeStructHash64(*this); - } + RasterizationState rasterization{}; + DepthStencilState depth_stencil{}; }; #pragma pack() @@ -186,12 +186,3 @@ protected: using PipelineHandle = IntrusivePtr; } // namespace VideoCore - -namespace std { -template <> -struct hash { - std::size_t operator()(const VideoCore::PipelineInfo& info) const noexcept { - return info.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/common/pipeline_cache.cpp b/src/video_core/common/pipeline_cache.cpp index 1d263c892..5d26fb661 100644 --- a/src/video_core/common/pipeline_cache.cpp +++ b/src/video_core/common/pipeline_cache.cpp @@ -1,14 +1,15 @@ -// Copyright 2018 Citra Emulator Project +// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include -#include +#include #include "core/frontend/scope_acquire_context.h" #include "video_core/common/pipeline_cache.h" #include "video_core/common/shader.h" #include "video_core/common/shader_gen.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" #include "video_core/video_core.h" namespace VideoCore { @@ -41,7 +42,8 @@ PipelineCache::PipelineCache(Frontend::EmuWindow& emu_window, std::unique_ptr(); } PipelineHandle PipelineCache::GetPipeline(PipelineInfo& info) { @@ -51,7 +53,8 @@ PipelineHandle PipelineCache::GetPipeline(PipelineInfo& info) { info.shaders[static_cast(ProgramType::FragmentShader)] = current_fragment_shader; // Search cache - if (auto iter = cached_pipelines.find(info); iter != cached_pipelines.end()) { + const u64 pipeline_hash = backend->PipelineInfoHash(info); + if (auto iter = cached_pipelines.find(pipeline_hash); iter != cached_pipelines.end()) { return iter->second; } @@ -309,7 +312,7 @@ void PipelineCache::LoadDiskCache(const std::atomic_bool& stop_loading, const Di const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size}; // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); + //contexts[i] = emu_window.CreateSharedContext(); threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end); } diff --git a/src/video_core/common/pipeline_cache.h b/src/video_core/common/pipeline_cache.h index be0e7b6e7..64ecc7734 100644 --- a/src/video_core/common/pipeline_cache.h +++ b/src/video_core/common/pipeline_cache.h @@ -58,13 +58,13 @@ private: std::unique_ptr& backend; std::unique_ptr generator; - // Keeps all the compiled graphics pipelines - std::unordered_map cached_pipelines; + // Keeps all the compiled graphics pipelines. The hash is decided by the backend + std::unordered_map cached_pipelines; // Current shaders - ShaderHandle current_vertex_shader; - ShaderHandle current_geometry_shader; - ShaderHandle current_fragment_shader; + ShaderHandle current_vertex_shader{}; + ShaderHandle current_geometry_shader{}; + ShaderHandle current_fragment_shader{}; // Pica runtime shader caches PicaVertexShaders pica_vertex_shaders; diff --git a/src/video_core/common/rasterizer.cpp b/src/video_core/common/rasterizer.cpp index 3f02bb3a3..094478a54 100644 --- a/src/video_core/common/rasterizer.cpp +++ b/src/video_core/common/rasterizer.cpp @@ -184,12 +184,10 @@ Rasterizer::Rasterizer(Frontend::EmuWindow& emu_window, std::unique_ptrCreateBuffer(TEXEL_BUFFER_INFO); texel_buffer_lut_lf = backend->CreateBuffer(TEXEL_BUFFER_LF_INFO); - /*glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); - uniform_size_aligned_vs = - Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); - uniform_size_aligned_fs = - Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); - */ + // TODO: Have the backend say this + uniform_buffer_alignment = 64; + uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); // Create pipeline cache pipeline_cache = std::make_unique(emu_window, backend); @@ -587,16 +585,14 @@ bool Rasterizer::Draw(bool accelerate, bool is_indexed) { viewport_rect_unscaled.bottom * res_scale, surfaces_rect.bottom, surfaces_rect.top))}; // Bottom - // Retrive the framebuffer assigned to the surfaces + // Retrieve the framebuffer assigned to the surfaces and update raster_info FramebufferHandle framebuffer = res_cache.GetFramebuffer(color_surface, depth_surface); - - // Sync the viewport - const Rect2D viewport = { - static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale, - static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale, - static_cast(viewport_rect_unscaled.GetWidth() * res_scale), - static_cast(viewport_rect_unscaled.GetHeight() * res_scale) - }; + raster_info.color_attachment = framebuffer->GetColorAttachment().IsValid() ? + framebuffer->GetColorAttachment()->GetFormat() : + TextureFormat::Undefined; + raster_info.depth_attachment = framebuffer->GetDepthStencilAttachment().IsValid() ? + framebuffer->GetDepthStencilAttachment()->GetFormat() : + TextureFormat::Undefined; if (uniform_block_data.data.framebuffer_scale != res_scale) { uniform_block_data.data.framebuffer_scale = res_scale; @@ -637,8 +633,12 @@ bool Rasterizer::Draw(bool accelerate, bool is_indexed) { shader_dirty = false; } + // Sync the viewport PipelineHandle raster_pipeline = pipeline_cache->GetPipeline(raster_info); - raster_pipeline->SetViewport(viewport); + raster_pipeline->SetViewport(surfaces_rect.left + viewport_rect_unscaled.left * res_scale, + surfaces_rect.bottom + viewport_rect_unscaled.bottom * res_scale, + viewport_rect_unscaled.GetWidth() * res_scale, + viewport_rect_unscaled.GetHeight() * res_scale); // Checks if the game is trying to use a surface as a texture and framebuffer at the same time // which causes unpredictable behavior on the host. @@ -803,16 +803,9 @@ bool Rasterizer::Draw(bool accelerate, bool is_indexed) { // Sync the uniform data UploadUniforms(raster_pipeline, accelerate); - const Common::Rectangle scissor = { - draw_rect.left, - draw_rect.bottom, - draw_rect.GetWidth(), - draw_rect.GetHeight() - }; - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. // Enable scissor test to prevent drawing outside of the framebuffer region - raster_pipeline->SetScissor(scissor); + raster_pipeline->SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(), draw_rect.GetHeight()); // Draw the vertex batch bool succeeded = true; @@ -1686,15 +1679,15 @@ void Rasterizer::SyncColorWriteMask() { void Rasterizer::SyncStencilWriteMask() { const auto& regs = Pica::g_state.regs; - raster_info.depth_stencil.stencil_write_mask.Assign( + raster_info.depth_stencil.stencil_write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) - : 0); + : 0; } void Rasterizer::SyncDepthWriteMask() { const auto& regs = Pica::g_state.regs; - raster_info.depth_stencil.stencil_write_mask.Assign( + raster_info.depth_stencil.depth_write_enable.Assign( (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && regs.framebuffer.output_merger.depth_write_enable)); } @@ -1708,8 +1701,8 @@ void Rasterizer::SyncStencilTest() { raster_info.depth_stencil.stencil_pass_op.Assign(regs.framebuffer.output_merger.stencil_test.action_depth_pass); raster_info.depth_stencil.stencil_depth_fail_op.Assign(regs.framebuffer.output_merger.stencil_test.action_depth_fail); raster_info.depth_stencil.stencil_compare_op.Assign(regs.framebuffer.output_merger.stencil_test.func); - raster_info.depth_stencil.stencil_reference.Assign(regs.framebuffer.output_merger.stencil_test.reference_value); - raster_info.depth_stencil.stencil_write_mask.Assign(regs.framebuffer.output_merger.stencil_test.input_mask); + raster_info.depth_stencil.stencil_reference = regs.framebuffer.output_merger.stencil_test.reference_value; + raster_info.depth_stencil.stencil_write_mask = regs.framebuffer.output_merger.stencil_test.input_mask; } void Rasterizer::SyncDepthTest() { diff --git a/src/video_core/common/rasterizer_cache.cpp b/src/video_core/common/rasterizer_cache.cpp index 478ca529e..115715a8d 100644 --- a/src/video_core/common/rasterizer_cache.cpp +++ b/src/video_core/common/rasterizer_cache.cpp @@ -1456,7 +1456,7 @@ bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, const bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, const SurfaceInterval& interval) { - auto [cvt_begin, cvt_end] = + /*auto [cvt_begin, cvt_end] = format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format); for (auto reinterpreter = cvt_begin; reinterpreter != cvt_end; ++reinterpreter) { PixelFormat format = reinterpreter->first.src_format; @@ -1498,7 +1498,7 @@ bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, Surface } return true; } - } + }*/ return false; } diff --git a/src/video_core/common/renderer.cpp b/src/video_core/common/renderer.cpp index d7b1fc2bd..726b4b6f7 100644 --- a/src/video_core/common/renderer.cpp +++ b/src/video_core/common/renderer.cpp @@ -165,11 +165,10 @@ DisplayRenderer::DisplayRenderer(Frontend::EmuWindow& window) : render_window(wi &fragment_shader_anaglyph_source, &fragment_shader_interlaced_source}; - const auto color_format = static_cast(backend->QueryDriver(Query::PresentFormat)); PipelineInfo present_pipeline_info = { .vertex_layout = ScreenRectVertex::GetVertexLayout(), .layout = RENDERER_PIPELINE_INFO, - .color_attachment = color_format, + .color_attachment = TextureFormat::PresentColor, .depth_attachment = TextureFormat::Undefined }; diff --git a/src/video_core/common/shader_runtime_cache.h b/src/video_core/common/shader_runtime_cache.h index f6758cbf1..f674f33ec 100644 --- a/src/video_core/common/shader_runtime_cache.h +++ b/src/video_core/common/shader_runtime_cache.h @@ -4,9 +4,10 @@ #pragma once -#include +#include #include #include +#include #include "video_core/common/backend.h" #include "video_core/common/shader_gen.h" diff --git a/src/video_core/common/texture.h b/src/video_core/common/texture.h index fe911735e..31d4cc9bd 100644 --- a/src/video_core/common/texture.h +++ b/src/video_core/common/texture.h @@ -24,6 +24,7 @@ enum class TextureFormat : u8 { D16 = 5, D24 = 6, D24S8 = 7, + PresentColor = 8, // Backend specific swapchain format Undefined = 255 }; diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index e09efe6eb..0aec38123 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -385,6 +385,8 @@ FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { std::make_unique()); } +FormatReinterpreterOpenGL::~FormatReinterpreterOpenGL() = default; + std::pair FormatReinterpreterOpenGL::GetPossibleReinterpretations(PixelFormat dst_format) { diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.h b/src/video_core/renderer_opengl/gl_format_reinterpreter.h index 3ef7e2886..d4b544096 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.h +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.h @@ -50,7 +50,7 @@ class FormatReinterpreterOpenGL : NonCopyable { public: explicit FormatReinterpreterOpenGL(); - ~FormatReinterpreterOpenGL() = default; + ~FormatReinterpreterOpenGL(); std::pair GetPossibleReinterpretations( SurfaceParams::PixelFormat dst_format); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d66045818..9ccce827b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -43,13 +43,10 @@ static bool IsVendorAmd() { const std::string_view gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc."; } - -#ifdef __APPLE__ static bool IsVendorIntel() { std::string gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; return gpu_vendor == "Intel Inc."; } -#endif RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window) : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), @@ -414,7 +411,7 @@ bool RasterizerOpenGL::SetupGeometryShader() { MICROPROFILE_SCOPE(OpenGL_GS); const auto& regs = Pica::g_state.regs; - if (regs.pipeline.use_gs != Pica::UseGS::No) { + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); return false; } @@ -761,9 +758,8 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { texture_cube_sampler.SyncWithConfig(texture.config); state.texture_units[texture_index].texture_2d = 0; continue; // Texture unit 0 setup finished. Continue to next unit - default: - state.texture_cube_unit.texture_cube = 0; } + state.texture_cube_unit.texture_cube = 0; } texture_samplers[texture_index].SyncWithConfig(texture.config); @@ -786,6 +782,12 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } } + if (color_surface->pixel_format == SurfaceParams::PixelFormat::RGB5A1 || + color_surface->pixel_format == SurfaceParams::PixelFormat::RGB565 || + color_surface->pixel_format == SurfaceParams::PixelFormat::RGBA4) { + LOG_WARNING(Render_OpenGL, "Render target with unsupported format!\n"); + } + OGLTexture temp_tex; if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) { // The game is trying to use a surface as a texture and framebuffer at the same time @@ -802,7 +804,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glBindTexture(GL_TEXTURE_2D, state.texture_units[0].texture_2d); - for (std::size_t level{0}; level <= color_surface->max_level; ++level) { + for (u32 level{0}; level <= color_surface->max_level; ++level) { glCopyImageSubData(color_surface->texture.handle, GL_TEXTURE_2D, level, 0, 0, 0, temp_tex.handle, GL_TEXTURE_2D, level, 0, 0, 0, color_surface->GetScaledWidth() >> level, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bece76a17..3f2714442 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -791,6 +791,10 @@ void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_ const FormatTuple& tuple = GetFormatTuple(pixel_format); GLuint target_tex = texture.handle; + if (addr == 0x1829bfc0) { + LOG_ERROR(Render_OpenGL, "Framebuffer upload!"); + } + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in // surface OGLTexture unscaled_tex; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a6e1eb156..b285f4d78 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -9,18 +9,19 @@ #include #include #include +#include #include "common/assert.h" #include "common/common_types.h" -#include "video_core/shader_compiler/frontend/opcode.h" -#include "video_core/shader_compiler/frontned/instruction.h" -#include "video_core/shader_compiler/frontend/register.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -using Pica::Shader::OpCode; -using Pica::Shader::DestRegister; - namespace OpenGL::ShaderDecompiler { +using nihstro::Instruction; +using nihstro::OpCode; +using nihstro::RegisterType; +using nihstro::SourceRegister; +using nihstro::SwizzlePattern; + constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH; class DecompileFail : public std::runtime_error { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 527efca24..453edf956 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -9,7 +9,7 @@ #include #include #include "common/common_types.h" -#include "video_core/shader_compiler/shader.h" +#include "video_core/shader/shader.h" namespace OpenGL::ShaderDecompiler { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index f427bb4c8..1a3ea1d73 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -66,7 +66,6 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { if (file.ReadBytes(&code_len, sizeof(u64)) != sizeof(u64)) { return false; } - program_code.resize(code_len); if (file.ReadArray(program_code.data(), code_len) != code_len) { return false; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index a8d3f10d3..5a2faeb9a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -41,17 +41,16 @@ using ProgramCode = std::vector; using ShaderDecompiledMap = std::unordered_map; using ShaderDumpsMap = std::unordered_map; -// Describes a shader how it's used by the guest GPU +/// Describes a shader how it's used by the guest GPU class ShaderDiskCacheRaw { public: + explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, + RawShaderConfig config, ProgramCode program_code); ShaderDiskCacheRaw() = default; - ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, - Pica::Regs config, std::vector program_code) : - unique_identifier(unique_identifier), program_type(program_type), config(config), - program_code(program_code) {} ~ShaderDiskCacheRaw() = default; bool Load(FileUtil::IOFile& file); + bool Save(FileUtil::IOFile& file) const; u64 GetUniqueIdentifier() const { @@ -62,19 +61,19 @@ public: return program_type; } - const std::vector& GetProgramCode() const { + const ProgramCode& GetProgramCode() const { return program_code; } - const Pica::Regs& GetRawShaderConfig() const { + const RawShaderConfig& GetRawShaderConfig() const { return config; } private: - u64 unique_identifier = 0; + u64 unique_identifier{}; ProgramType program_type{}; - Pica::Regs config{}; - std::vector program_code{}; + RawShaderConfig config{}; + ProgramCode program_code{}; }; /// Contains decompiled data from a shader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index af219031c..7f49e9b73 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -175,11 +175,11 @@ public: void Create(const char* source, GLenum type) { if (shader_or_program.which() == 0) { - std::get(shader_or_program).Create(source, type); + boost::get(shader_or_program).Create(source, type); } else { OGLShader shader; shader.Create(source, type); - OGLProgram& program = std::get(shader_or_program); + OGLProgram& program = boost::get(shader_or_program); program.Create(true, {shader.handle}); SetShaderUniformBlockBindings(program.handle); @@ -191,9 +191,9 @@ public: GLuint GetHandle() const { if (shader_or_program.which() == 0) { - return std::get(shader_or_program).handle; + return boost::get(shader_or_program).handle; } else { - return std::get(shader_or_program).handle; + return boost::get(shader_or_program).handle; } } @@ -204,7 +204,7 @@ public: } private: - std::variant shader_or_program; + boost::variant shader_or_program; }; class TrivialVertexShader { @@ -393,7 +393,7 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, // Save VS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; - std::vector program_code{setup.program_code.begin(), setup.program_code.end()}; + ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()}; program_code.insert(program_code.end(), setup.swizzle_data.begin(), setup.swizzle_data.end()); const u64 unique_identifier = GetUniqueIdentifier(regs, program_code); @@ -715,7 +715,6 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, contexts[i] = emu_window.CreateSharedContext(); threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end); } - for (auto& thread : threads) { thread.join(); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 895d0eb65..39c1392f2 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -5,7 +5,6 @@ #pragma once #include -#include #include #include "video_core/rasterizer_interface.h" #include "video_core/regs_lighting.h" @@ -23,39 +22,39 @@ namespace OpenGL { enum class UniformBindings : GLuint { Common, VS, GS }; struct LightSrc { - alignas(16) Common::Vec3f specular_0; - alignas(16) Common::Vec3f specular_1; - alignas(16) Common::Vec3f diffuse; - alignas(16) Common::Vec3f ambient; - alignas(16) Common::Vec3f position; - alignas(16) Common::Vec3f spot_direction; // negated - float dist_atten_bias; - float dist_atten_scale; + alignas(16) GLvec3 specular_0; + alignas(16) GLvec3 specular_1; + alignas(16) GLvec3 diffuse; + alignas(16) GLvec3 ambient; + alignas(16) GLvec3 position; + alignas(16) GLvec3 spot_direction; // negated + GLfloat dist_atten_bias; + GLfloat dist_atten_scale; }; -// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. // Not following that rule will cause problems on some AMD drivers. struct UniformData { - int framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - float shadow_bias_constant; - float shadow_bias_linear; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - int fog_lut_offset; - int proctex_noise_lut_offset; - int proctex_color_map_offset; - int proctex_alpha_map_offset; - int proctex_lut_offset; - int proctex_diff_lut_offset; - float proctex_bias; - int shadow_texture_bias; + GLint framebuffer_scale; + GLint alphatest_ref; + GLfloat depth_scale; + GLfloat depth_offset; + GLfloat shadow_bias_constant; + GLfloat shadow_bias_linear; + GLint scissor_x1; + GLint scissor_y1; + GLint scissor_x2; + GLint scissor_y2; + GLint fog_lut_offset; + GLint proctex_noise_lut_offset; + GLint proctex_color_map_offset; + GLint proctex_alpha_map_offset; + GLint proctex_lut_offset; + GLint proctex_diff_lut_offset; + GLfloat proctex_bias; + GLint shadow_texture_bias; alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; alignas(16) GLvec3 fog_color; alignas(8) GLvec2 proctex_noise_f; @@ -63,29 +62,29 @@ struct UniformData { alignas(8) GLvec2 proctex_noise_p; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; - alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages - alignas(16) Common::Vec4f tev_combiner_buffer_color; - alignas(16) Common::Vec4f clip_coef; + alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) GLvec4 tev_combiner_buffer_color; + alignas(16) GLvec4 clip_coef; }; -static_assert(sizeof(UniformData) == 0x4F0, - "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert( + sizeof(UniformData) == 0x4F0, + "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); - -// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. +/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. // NOTE: the same rule from UniformData also applies here. struct PicaUniformsData { void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); struct BoolAligned { - alignas(16) int b; + alignas(16) GLint b; }; - GLvec4 + std::array bools; - alignas(16) std::array i; - alignas(16) std::array f; + alignas(16) std::array i; + alignas(16) std::array f; }; struct VSUniformData { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 20089a3d0..4829a5b08 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -312,11 +312,10 @@ uniform int reverse_interlaced; void main() { float screen_row = o_resolution.x * frag_tex_coord.x; - if (int(screen_row) % 2 == reverse_interlaced) { + if (int(screen_row) % 2 == reverse_interlaced) color = texture(color_texture, frag_tex_coord); - } else { + else color = texture(color_texture_r, frag_tex_coord); - } } )"; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 1633bf8f0..634d26ca4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -57,7 +57,7 @@ struct PresentationTexture { OGLTexture texture; }; -class RendererOpenGL : public VideoCore::RendererBase { +class RendererOpenGL : public RendererBase { public: explicit RendererOpenGL(Frontend::EmuWindow& window); ~RendererOpenGL() override; diff --git a/src/video_core/renderer_opengl/texture_downloader_es.cpp b/src/video_core/renderer_opengl/texture_downloader_es.cpp index 11663512e..1119ad65e 100644 --- a/src/video_core/renderer_opengl/texture_downloader_es.cpp +++ b/src/video_core/renderer_opengl/texture_downloader_es.cpp @@ -50,16 +50,18 @@ void TextureDownloaderES::Test() { state.Apply(); original_data.resize(tex_size * tex_size); - for (std::size_t idx = 0; idx < original_data.size(); ++idx) + for (std::size_t idx = 0; idx < original_data.size(); ++idx) { original_data[idx] = data_generator(idx); - glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tex_size, tex_size); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_size, tex_size, tuple.format, tuple.type, + } + GLsizei tex_sizei = static_cast(tex_size); + glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tex_sizei, tex_sizei); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_sizei, tex_sizei, tuple.format, tuple.type, original_data.data()); decltype(original_data) new_data(original_data.size()); glFinish(); auto start = std::chrono::high_resolution_clock::now(); - GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_size, tex_size, + GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_sizei, tex_sizei, new_data.data()); glFinish(); auto time = std::chrono::high_resolution_clock::now() - start; diff --git a/src/video_core/renderer_vulkan/vk_backend.cpp b/src/video_core/renderer_vulkan/vk_backend.cpp index a2073cd9d..9aba97c4b 100644 --- a/src/video_core/renderer_vulkan/vk_backend.cpp +++ b/src/video_core/renderer_vulkan/vk_backend.cpp @@ -106,9 +106,22 @@ Backend::~Backend() { } } +u64 Backend::PipelineInfoHash(const PipelineInfo& info) { + const bool hash_all = !instance.IsExtendedDynamicStateSupported(); + if (hash_all) { + // Don't hash the last three members of DepthStencilState, these are + // dynamic in every Vulkan implementation + return Common::ComputeHash64(&info, offsetof(PipelineInfo, depth_stencil) + + offsetof(DepthStencilState, stencil_reference)); + } else { + // Hash everything except depth_stencil and rasterization + return Common::ComputeHash64(&info, offsetof(PipelineInfo, rasterization)); + } +} + /** * To avoid many small heap allocations during handle creation, each resource has a dedicated pool - * associated with it that batch allocates memory. + * associated with it that batch-allocates memory. */ BufferHandle Backend::CreateBuffer(BufferInfo info) { static ObjectPool buffer_pool; @@ -137,15 +150,13 @@ PipelineHandle Backend::CreatePipeline(PipelineType type, PipelineInfo info) { // Get renderpass vk::RenderPass renderpass = GetRenderPass(info.color_attachment, info.depth_attachment); - // Find a pipeline layout first - if (auto iter = pipeline_layouts.find(info.layout); iter != pipeline_layouts.end()) { - PipelineLayout& layout = iter->second; - - return PipelineHandle{pipeline_pool.Allocate(instance, layout, type, info, renderpass, cache)}; + // Find an owner first + if (auto iter = pipeline_owners.find(info.layout); iter != pipeline_owners.end()) { + return PipelineHandle{pipeline_pool.Allocate(instance, iter->second, type, info, renderpass, cache)}; } // Create the layout - auto result = pipeline_layouts.emplace(info.layout, PipelineLayout{instance, info.layout}); + auto result = pipeline_owners.emplace(info.layout, PipelineOwner{instance, info.layout}); return PipelineHandle{pipeline_pool.Allocate(instance, result.first->second, type, info, renderpass, cache)}; } @@ -155,7 +166,6 @@ SamplerHandle Backend::CreateSampler(SamplerInfo info) { } void Backend::Draw(PipelineHandle pipeline_handle, FramebufferHandle draw_framebuffer, - BufferHandle vertex_buffer, u32 base_vertex, u32 num_vertices) { // Bind descriptor sets vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); diff --git a/src/video_core/renderer_vulkan/vk_backend.h b/src/video_core/renderer_vulkan/vk_backend.h index 2e70795c9..6f4a9eb88 100644 --- a/src/video_core/renderer_vulkan/vk_backend.h +++ b/src/video_core/renderer_vulkan/vk_backend.h @@ -16,31 +16,35 @@ namespace VideoCore::Vulkan { class Texture; constexpr u32 RENDERPASS_COUNT = (MAX_COLOR_FORMATS + 1) * (MAX_DEPTH_FORMATS + 1); -constexpr u32 DESCRIPTOR_BANK_SIZE = 64; class Backend final : public VideoCore::BackendBase { public: Backend(Frontend::EmuWindow& window); ~Backend(); - void SwapBuffers() override; + bool BeginPresent() override; + void EndPresent() override; + + FramebufferHandle GetWindowFramebuffer() override; + + u64 QueryDriver(Query query) override; + + u64 PipelineInfoHash(const PipelineInfo& info) override; BufferHandle CreateBuffer(BufferInfo info) override; - FramebufferHandle CreateFramebuffer(FramebufferInfo info) override; - TextureHandle CreateTexture(TextureInfo info) override; - PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) override; - SamplerHandle CreateSampler(SamplerInfo info) override; + ShaderHandle CreateShader(ShaderStage stage, std::string_view name, std::string source) override; + + void BindVertexBuffer(BufferHandle buffer, std::span offsets) override; + void BindIndexBuffer(BufferHandle buffer, AttribType index_type, u32 offset) override; void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, - BufferHandle vertex_buffer, u32 base_vertex, u32 num_vertices) override; void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, - BufferHandle vertex_buffer, BufferHandle index_buffer, AttribType index_type, u32 base_index, u32 num_indices, u32 base_vertex) override; void DispatchCompute(PipelineHandle pipeline, Common::Vec3 groupsize, @@ -73,8 +77,8 @@ private: std::array renderpass_cache; vk::PipelineCache cache; - // Pipeline layout cache - std::unordered_map pipeline_layouts; + // A cache of pipeline owners + std::unordered_map pipeline_owners; // Descriptor pools std::array descriptor_pools; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.cpp b/src/video_core/renderer_vulkan/vk_pipeline.cpp index 23698065b..9406dcaed 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline.cpp @@ -10,12 +10,10 @@ #include "video_core/renderer_vulkan/vk_texture.h" #include "video_core/renderer_vulkan/vk_buffer.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" namespace VideoCore::Vulkan { -// Maximum binding per descriptor set -constexpr u32 MAX_BINDING_SLOTS = 7; - vk::ShaderStageFlags ToVkStageFlags(BindingType type) { vk::ShaderStageFlags flags; switch (type) { @@ -62,21 +60,21 @@ vk::DescriptorType ToVkDescriptorType(BindingType type) { u32 AttribBytes(VertexAttribute attrib) { switch (attrib.type) { case AttribType::Float: - return sizeof(float) * attrib.components; + return sizeof(float) * attrib.size; case AttribType::Int: - return sizeof(u32) * attrib.components; + return sizeof(u32) * attrib.size; case AttribType::Short: - return sizeof(u16) * attrib.components; + return sizeof(u16) * attrib.size; case AttribType::Byte: case AttribType::Ubyte: - return sizeof(u8) * attrib.components; + return sizeof(u8) * attrib.size; } } vk::Format ToVkAttributeFormat(VertexAttribute attrib) { switch (attrib.type) { case AttribType::Float: - switch (attrib.components) { + switch (attrib.size) { case 1: return vk::Format::eR32Sfloat; case 2: return vk::Format::eR32G32Sfloat; case 3: return vk::Format::eR32G32B32Sfloat; @@ -104,20 +102,20 @@ vk::ShaderStageFlagBits ToVkShaderStage(ShaderStage stage) { } } -PipelineLayout::PipelineLayout(Instance& instance, PipelineLayoutInfo info) : +PipelineOwner::PipelineOwner(Instance& instance, PipelineLayoutInfo info) : instance(instance), set_layout_count(info.group_count) { // Used as temp storage for CreateDescriptorSet - std::array set_bindings; - std::array update_entries; + std::array set_bindings; + std::array update_entries; vk::Device device = instance.GetDevice(); for (u32 set = 0; set < set_layout_count; set++) { auto& group = info.binding_groups[set]; u32 binding = 0; - while (group[binding] != BindingType::None) { - const BindingType type = group[binding]; + while (group.Value(binding) != BindingType::None) { + const BindingType type = group.Value(binding); set_bindings[binding] = vk::DescriptorSetLayoutBinding{ .binding = binding, .descriptorType = ToVkDescriptorType(type), @@ -175,7 +173,7 @@ PipelineLayout::PipelineLayout(Instance& instance, PipelineLayoutInfo info) : pipeline_layout = device.createPipelineLayout(layout_info); } -PipelineLayout::~PipelineLayout() { +PipelineOwner::~PipelineOwner() { vk::Device device = instance.GetDevice(); device.destroyPipelineLayout(pipeline_layout); @@ -186,9 +184,10 @@ PipelineLayout::~PipelineLayout() { } } -Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, PipelineInfo info, +Pipeline::Pipeline(Instance& instance, CommandScheduler& scheduler, PipelineOwner& owner, + PipelineType type, PipelineInfo info, vk::RenderPass renderpass, vk::PipelineCache cache) : PipelineBase(type, info), - instance(instance), owner(owner) { + instance(instance), scheduler(scheduler), owner(owner) { vk::Device device = instance.GetDevice(); @@ -209,43 +208,41 @@ Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, }; } - // Create a graphics pipeline if (type == PipelineType::Graphics) { + /** - * Most modern graphics APIs don't natively support constant attributes. To avoid duplicating - * the data and increasing data bandwith, we reserve the last binding for fixed attributes, - * which are always interleaved and specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate. - * Since we are always rendering 1 instance, the shader will always read the single attribute + * Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and increasing + * data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE as the input rate. + * Since 1 instance is all we render, the shader will always read the single attribute. */ - const vk::VertexInputBindingDescription binding_desc = { - .binding = 0, - .stride = info.vertex_layout.stride - }; + std::array bindings; + for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { + const auto& binding = info.vertex_layout.bindings[i]; + bindings[i] = vk::VertexInputBindingDescription{ + .binding = binding.binding, + .stride = binding.stride, + .inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance + : vk::VertexInputRate::eVertex + }; + } // Populate vertex attribute structures - u32 attribute_count = 0; - std::array attribute_desc; - for (u32 i = 0; i < MAX_VERTEX_ATTRIBUTES; i++) { - auto& attr = info.vertex_layout.attributes[i]; - if (attr.components == 0) { - attribute_count = i; - break; - } - - attribute_desc[i] = vk::VertexInputAttributeDescription{ - .location = i, - .binding = 0, + std::array attributes; + for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { + const auto& attr = info.vertex_layout.attributes[i]; + attributes[i] = vk::VertexInputAttributeDescription{ + .location = attr.location, + .binding = attr.binding, .format = ToVkAttributeFormat(attr), - .offset = (i > 0 ? attribute_desc[i - 1].offset + - AttribBytes(info.vertex_layout.attributes[i - 1]) : 0) + .offset = attr.offset }; } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = &binding_desc, - .vertexAttributeDescriptionCount = attribute_count, - .pVertexAttributeDescriptions = attribute_desc.data() + .vertexBindingDescriptionCount = info.vertex_layout.binding_count, + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count, + .pVertexAttributeDescriptions = attributes.data() }; const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { @@ -279,8 +276,8 @@ Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, }; const vk::PipelineColorBlendStateCreateInfo color_blending = { - .logicOpEnable = true, - .logicOp = vk::LogicOp::eCopy, // TODO + .logicOpEnable = info.blending.logic_op_enable.Value(), + .logicOp = PicaToVK::LogicOp(info.blending.logic_op), // TODO .attachmentCount = 1, .pAttachments = &colorblend_attachment, }; @@ -385,10 +382,11 @@ void Pipeline::BindTexture(u32 group, u32 slot, TextureHandle handle) { owner.SetBinding(group, slot, data); } -void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view) { +void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 offset, u32 range, u32 view) { Buffer* buffer = static_cast(handle.Get()); // Texel buffers are bound with their views + // TODO: Support variable binding range? if (buffer->GetUsage() == BufferUsage::Texel) { const DescriptorData data = { .buffer_view = buffer->GetView(view) @@ -399,8 +397,8 @@ void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view) { const DescriptorData data = { .buffer_info = vk::DescriptorBufferInfo{ .buffer = buffer->GetHandle(), - .offset = 0, - .range = buffer->GetCapacity() + .offset = offset, + .range = (range == WHOLE_SIZE ? buffer->GetCapacity() : range) } }; @@ -420,4 +418,22 @@ void Pipeline::BindSampler(u32 group, u32 slot, SamplerHandle handle) { owner.SetBinding(group, slot, data); } +void Pipeline::BindPushConstant(std::span data) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.pushConstants(owner.GetLayout(), + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + 0, data.size(), data.data()); +} + +// Viewport and scissor are always dynamic +void Pipeline::SetViewport(float x, float y, float width, float height) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f}); +} + +void Pipeline::SetScissor(s32 x, s32 y, u32 width, u32 height) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}}); +} + } // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h index 549b0a71e..a8b67b28b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -14,7 +14,7 @@ class Instance; class CommandScheduler; union DescriptorData { - vk::DescriptorImageInfo image_info{}; + vk::DescriptorImageInfo image_info; vk::DescriptorBufferInfo buffer_info; vk::BufferView buffer_view; }; @@ -24,18 +24,19 @@ union DescriptorData { * and update templates associated with those layouts. * Functions as the "parent" to a group of pipelines that share the same layout */ -class PipelineLayout { +class PipelineOwner { public: - PipelineLayout(Instance& instance, PipelineLayoutInfo info); - ~PipelineLayout(); + PipelineOwner(Instance& instance, PipelineLayoutInfo info); + ~PipelineOwner(); // Disable copy constructor - PipelineLayout(const PipelineLayout&) = delete; - PipelineLayout& operator=(const PipelineLayout&) = delete; + PipelineOwner(const PipelineOwner&) = delete; + PipelineOwner& operator=(const PipelineOwner&) = delete; // Assigns data to a particular binding void SetBinding(u32 set, u32 binding, DescriptorData data) { update_data[set][binding] = data; + descriptor_dirty[set] = true; } // Returns the number of descriptor set layouts @@ -68,26 +69,32 @@ private: u32 set_layout_count = 0; std::array set_layouts; std::array update_templates; + std::array descriptor_bank; // Update data for the descriptor sets using SetData = std::array; - std::array update_data; + std::array update_data{}; + std::array descriptor_dirty{true}; }; class Pipeline : public VideoCore::PipelineBase { public: - Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, PipelineInfo info, + Pipeline(Instance& instance, CommandScheduler& scheduler, PipelineOwner& owner, + PipelineType type, PipelineInfo info, vk::RenderPass renderpass, vk::PipelineCache cache); ~Pipeline() override; - void BindTexture(u32 group, u32 slot, TextureHandle handle) override; + virtual void BindTexture(u32 group, u32 slot, TextureHandle handle) override; + virtual void BindBuffer(u32 group, u32 slot, BufferHandle handle, + u32 offset = 0, u32 range = WHOLE_SIZE, u32 view = 0) override; + virtual void BindSampler(u32 group, u32 slot, SamplerHandle handle) override; + virtual void BindPushConstant(std::span data) override; - void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) override; - - void BindSampler(u32 group, u32 slot, SamplerHandle handle) override; + virtual void SetViewport(float x, float y, float width, float height) override; + virtual void SetScissor(s32 x, s32 y, u32 width, u32 height) override; /// Returns the layout tracker that owns this pipeline - PipelineLayout& GetOwner() const { + PipelineOwner& GetOwner() const { return owner; } @@ -98,7 +105,8 @@ public: private: Instance& instance; - PipelineLayout& owner; + CommandScheduler& scheduler; + PipelineOwner& owner; vk::Pipeline pipeline; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp deleted file mode 100644 index 76be3845e..000000000 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ /dev/null @@ -1,1629 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include "common/alignment.h" -#include "common/math_util.h" -#include "common/microprofile.h" -#include "common/vector_math.h" -#include "core/hw/gpu.h" -#include "video_core/pica_state.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_rasterizer.h" -#include "video_core/regs_texturing.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_surface_params.h" -#include "video_core/renderer_vulkan/pica_to_vulkan.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" - -namespace Vulkan { - -MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128)); -MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(Vulkan_Blits, "Vulkan", "Blits", MP_RGB(100, 100, 255)); -MICROPROFILE_DEFINE(Vulkan_CacheManagement, "Vulkan", "Cache Management", MP_RGB(100, 255, 100)); - -using PixelFormat = SurfaceParams::PixelFormat; -using SurfaceType = SurfaceParams::SurfaceType; - -// They shall be big enough for about one frame. -constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; -constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; -constexpr u32 TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; - -constexpr std::array LUT_LF_VIEWS = { - vk::Format::eR32G32Sfloat -}; - -constexpr std::array LUT_VIEWS = { - vk::Format::eR32G32Sfloat, - vk::Format::eR32G32B32A32Sfloat -}; - -RasterizerVulkan::RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window) : - scheduler(scheduler), vertex_buffer(scheduler, VERTEX_BUFFER_SIZE, BufferUsage::Vertex), - index_buffer(scheduler, INDEX_BUFFER_SIZE, BufferUsage::Index), - uniform_buffer(scheduler, UNIFORM_BUFFER_SIZE, BufferUsage::Uniform), - texture_buffer_lut_lf(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_LF_VIEWS), - texture_buffer_lut(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_VIEWS) { - - // Implement shadow - allow_shadow = false; - - // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 - //state.clip_distance[0] = true; - - // Setup uniform data - uniform_block_data.dirty = true; - uniform_block_data.lighting_lut_dirty.fill(true); - uniform_block_data.lighting_lut_dirty_any = true; - uniform_block_data.fog_lut_dirty = true; - uniform_block_data.proctex_noise_lut_dirty = true; - uniform_block_data.proctex_color_map_dirty = true; - uniform_block_data.proctex_alpha_map_dirty = true; - uniform_block_data.proctex_lut_dirty = true; - uniform_block_data.proctex_diff_lut_dirty = true; - - // Query uniform buffer alignment requirements - uniform_buffer_alignment = g_vk_instace->UniformMinAlignment(); - uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), - uniform_buffer_alignment); - uniform_size_aligned_fs = Common::AlignUp(sizeof(UniformData), - uniform_buffer_alignment); - auto& state = VulkanState::Get(); - state.SetUniformBuffer(0, 0, uniform_size_aligned_vs, uniform_buffer); - state.SetUniformBuffer(1, uniform_size_aligned_vs, uniform_size_aligned_fs, uniform_buffer); - - // Bind texel buffers - state.SetTexelBuffer(0, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut_lf, 0); - state.SetTexelBuffer(1, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 0); - state.SetTexelBuffer(2, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 1); - - // Set clear texture color - state.SetPlaceholderColor(255, 255, 255, 255); - - SyncEntireState(); -} - -RasterizerVulkan::~RasterizerVulkan() = default; - -void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - -} - -void RasterizerVulkan::SyncEntireState() { - // Sync fixed function Vulkan state - SyncClipEnabled(); - SyncCullMode(); - SyncBlendEnabled(); - SyncBlendFuncs(); - SyncBlendColor(); - SyncLogicOp(); - SyncStencilTest(); - SyncDepthTest(); - SyncColorWriteMask(); - SyncStencilWriteMask(); - SyncDepthWriteMask(); - - // Sync uniforms - SyncClipCoef(); - SyncDepthScale(); - SyncDepthOffset(); - SyncAlphaTest(); - SyncCombinerColor(); - auto& tev_stages = Pica::g_state.regs.texturing.GetTevStages(); - for (std::size_t index = 0; index < tev_stages.size(); ++index) - SyncTevConstColor(index, tev_stages[index]); - - SyncGlobalAmbient(); - for (unsigned light_index = 0; light_index < 8; light_index++) { - SyncLightSpecular0(light_index); - SyncLightSpecular1(light_index); - SyncLightDiffuse(light_index); - SyncLightAmbient(light_index); - SyncLightPosition(light_index); - SyncLightDistanceAttenuationBias(light_index); - SyncLightDistanceAttenuationScale(light_index); - } - - SyncFogColor(); - SyncProcTexNoise(); - SyncProcTexBias(); - SyncShadowBias(); - SyncShadowTextureBias(); -} - -/** - * This is a helper function to resolve an issue when interpolating opposite quaternions. See below - * for a detailed description of this issue (yuriks): - * - * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you - * interpolate two quaternions that are opposite, instead of going from one rotation to another - * using the shortest path, you'll go around the longest path. You can test if two quaternions are - * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore - * making Dot(Q1, -Q2) positive. - * - * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is - * correct for most cases but can still rotate around the long way sometimes. An implementation - * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check - * between each step would work for those cases at the cost of being more complex to implement. - * - * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around - * these issues, making this basic implementation actually more accurate to the hardware. - */ -static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { - Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; - Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; - - return (Common::Dot(a, b) < 0.f); -} - -void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0, false); - vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); - vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); -} - -static constexpr std::array vs_attrib_types{ - GL_BYTE, // VertexAttributeFormat::BYTE - GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE - GL_SHORT, // VertexAttributeFormat::SHORT - GL_FLOAT // VertexAttributeFormat::FLOAT -}; - -struct VertexArrayInfo { - u32 vs_input_index_min; - u32 vs_input_index_max; - u32 vs_input_size; -}; - -static GLenum GetCurrentPrimitiveMode() { - const auto& regs = Pica::g_state.regs; - switch (regs.pipeline.triangle_topology) { - case Pica::PipelineRegs::TriangleTopology::Shader: - case Pica::PipelineRegs::TriangleTopology::List: - return GL_TRIANGLES; - case Pica::PipelineRegs::TriangleTopology::Fan: - return GL_TRIANGLE_FAN; - case Pica::PipelineRegs::TriangleTopology::Strip: - return GL_TRIANGLE_STRIP; - default: - UNREACHABLE(); - } -} - -void RasterizerVulkan::DrawTriangles() { - if (vertex_batch.empty()) - return; - Draw(false, false); -} - -bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - const auto& regs = Pica::g_state.regs; - auto& state = VulkanState::Get(); - - bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == - Pica::FramebufferRegs::FragmentOperationMode::Shadow; - - const bool has_stencil = - regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - - const bool write_depth_fb = state.DepthTestEnabled() || (has_stencil && state.StencilTestEnabled()); - - const bool using_color_fb = - regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0; - - const bool using_depth_fb = - !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && - (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0); - - Common::Rectangle viewport_rect_unscaled{ - // These registers hold half-width and half-height, so must be multiplied by 2 - regs.rasterizer.viewport_corner.x, // left - regs.rasterizer.viewport_corner.y + // top - static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * - 2), - regs.rasterizer.viewport_corner.x + // right - static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * - 2), - regs.rasterizer.viewport_corner.y // bottom - }; - - Surface color_surface, depth_surface; - Common::Rectangle surfaces_rect; - std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); - - const u16 res_scale = color_surface != nullptr - ? color_surface->res_scale - : (depth_surface == nullptr ? 1u : depth_surface->res_scale); - - Common::Rectangle draw_rect{ - static_cast(std::clamp(static_cast(surfaces_rect.left) + - viewport_rect_unscaled.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast(std::clamp(static_cast(surfaces_rect.bottom) + - viewport_rect_unscaled.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast(std::clamp(static_cast(surfaces_rect.left) + - viewport_rect_unscaled.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast(std::clamp(static_cast(surfaces_rect.bottom) + - viewport_rect_unscaled.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom - - // Sync the viewport - vk::Viewport viewport{0, 0, static_cast(viewport_rect_unscaled.GetWidth() * res_scale), - static_cast(viewport_rect_unscaled.GetHeight() * res_scale), - 0.f, 1.f}; - state.SetViewport(viewport); - - if (uniform_block_data.data.framebuffer_scale != res_scale) { - uniform_block_data.data.framebuffer_scale = res_scale; - uniform_block_data.dirty = true; - } - - // Scissor checks are window-, not viewport-relative, which means that if the cached texture - // sub-rect changes, the scissor bounds also need to be updated. - int scissor_x1 = - static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); - int scissor_y1 = - static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); - // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when - // scaling or doing multisampling. - int scissor_x2 = - static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); - int scissor_y2 = static_cast(surfaces_rect.bottom + - (regs.rasterizer.scissor_test.y2 + 1) * res_scale); - - if (uniform_block_data.data.scissor_x1 != scissor_x1 || - uniform_block_data.data.scissor_x2 != scissor_x2 || - uniform_block_data.data.scissor_y1 != scissor_y1 || - uniform_block_data.data.scissor_y2 != scissor_y2) { - - uniform_block_data.data.scissor_x1 = scissor_x1; - uniform_block_data.data.scissor_x2 = scissor_x2; - uniform_block_data.data.scissor_y1 = scissor_y1; - uniform_block_data.data.scissor_y2 = scissor_y2; - uniform_block_data.dirty = true; - } - - // Sync and bind the texture surfaces - Texture temp_tex; - const auto pica_textures = regs.texturing.GetTextures(); - for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { - const auto& texture = pica_textures[texture_index]; - - if (texture.enabled) { - //texture_samplers[texture_index].SyncWithConfig(texture.config); - Surface surface = res_cache.GetTextureSurface(texture); - if (surface != nullptr) { - if (color_surface && color_surface->texture.GetHandle() == - surface->texture.GetHandle()) { - // The game is trying to use a surface as a texture and framebuffer at the same time - // which causes unpredictable behavior on the host. - // Making a copy to sample from eliminates this issue and seems to be fairly cheap. - temp_tex.Create(color_surface->texture); - state.SetTexture(texture_index, temp_tex); - } else { - state.SetTexture(texture_index, surface->texture); - } - } else { - // Can occur when texture addr is null or its memory is unmapped/invalid - // HACK: In this case, the correct behaviour for the PICA is to use the last - // rendered colour. But because this would be impractical to implement, the - // next best alternative is to use a clear texture, essentially skipping - // the geometry in question. - // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn - // on the male character's face, which in the OpenGL default appear black. - state.UnbindTexture(texture_index); - } - } else { - state.UnbindTexture(texture_index); - } - } - - // Sync the LUTs within the texture buffer - SyncAndUploadLUTs(); - SyncAndUploadLUTsLF(); - - // Sync the uniform data - UploadUniforms(accelerate); - - // Viewport can have negative offsets or larger - // dimensions than our framebuffer sub-rect. - // Enable scissor test to prevent drawing - // outside of the framebuffer region - vk::Rect2D scissor{vk::Offset2D(draw_rect.left, draw_rect.bottom), - vk::Extent2D(draw_rect.GetHeight(), draw_rect.GetHeight())}; - state.SetScissor(scissor); - - // Bind the framebuffer surfaces - state.BeginRendering(color_surface != nullptr ? &color_surface->texture : nullptr, - depth_surface != nullptr ? &depth_surface->texture : nullptr, true); - state.ApplyRenderState(Pica::g_state.regs); - state.SetVertexBuffer(vertex_buffer, 0); - - ASSERT(vertex_batch.size() <= VERTEX_BUFFER_SIZE); - - std::size_t vertices = vertex_batch.size(); - auto data = std::as_bytes(std::span(vertex_batch.data(), vertex_batch.size())); - vertex_buffer.Upload(data, 0); - - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - cmdbuffer.draw(vertices, 1, 0, 0); - - vertex_batch.clear(); - - // Mark framebuffer surfaces as dirty - Common::Rectangle draw_rect_unscaled{draw_rect.left / res_scale, draw_rect.top / res_scale, - draw_rect.right / res_scale, - draw_rect.bottom / res_scale}; - - if (color_surface != nullptr) { - auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - color_surface); - } - if (depth_surface != nullptr && write_depth_fb) { - auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - depth_surface); - } - - state.EndRendering(); - - if (color_surface) { - color_surface->texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - } - - if (depth_surface) { - depth_surface->texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - } - - return true; -} - -void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) { - const auto& regs = Pica::g_state.regs; - - switch (id) { - // Culling - case PICA_REG_INDEX(rasterizer.cull_mode): - SyncCullMode(); - break; - - // Clipping plane - case PICA_REG_INDEX(rasterizer.clip_enable): - SyncClipEnabled(); - break; - - case PICA_REG_INDEX(rasterizer.clip_coef[0]): - case PICA_REG_INDEX(rasterizer.clip_coef[1]): - case PICA_REG_INDEX(rasterizer.clip_coef[2]): - case PICA_REG_INDEX(rasterizer.clip_coef[3]): - SyncClipCoef(); - break; - - // Depth modifiers - case PICA_REG_INDEX(rasterizer.viewport_depth_range): - SyncDepthScale(); - break; - case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane): - SyncDepthOffset(); - break; - - // Depth buffering - case PICA_REG_INDEX(rasterizer.depthmap_enable): - shader_dirty = true; - break; - - // Blending - case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): - //if (GLES) { - // With GLES, we need this in the fragment shader to emulate logic operations - // shader_dirty = true; - //} - SyncBlendEnabled(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): - SyncBlendFuncs(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.blend_const): - SyncBlendColor(); - break; - - // Shadow texture - case PICA_REG_INDEX(texturing.shadow): - SyncShadowTextureBias(); - break; - - // Fog state - case PICA_REG_INDEX(texturing.fog_color): - SyncFogColor(); - break; - case PICA_REG_INDEX(texturing.fog_lut_data[0]): - case PICA_REG_INDEX(texturing.fog_lut_data[1]): - case PICA_REG_INDEX(texturing.fog_lut_data[2]): - case PICA_REG_INDEX(texturing.fog_lut_data[3]): - case PICA_REG_INDEX(texturing.fog_lut_data[4]): - case PICA_REG_INDEX(texturing.fog_lut_data[5]): - case PICA_REG_INDEX(texturing.fog_lut_data[6]): - case PICA_REG_INDEX(texturing.fog_lut_data[7]): - uniform_block_data.fog_lut_dirty = true; - break; - - // ProcTex state - case PICA_REG_INDEX(texturing.proctex): - case PICA_REG_INDEX(texturing.proctex_lut): - case PICA_REG_INDEX(texturing.proctex_lut_offset): - SyncProcTexBias(); - shader_dirty = true; - break; - - case PICA_REG_INDEX(texturing.proctex_noise_u): - case PICA_REG_INDEX(texturing.proctex_noise_v): - case PICA_REG_INDEX(texturing.proctex_noise_frequency): - SyncProcTexNoise(); - break; - - case PICA_REG_INDEX(texturing.proctex_lut_data[0]): - case PICA_REG_INDEX(texturing.proctex_lut_data[1]): - case PICA_REG_INDEX(texturing.proctex_lut_data[2]): - case PICA_REG_INDEX(texturing.proctex_lut_data[3]): - case PICA_REG_INDEX(texturing.proctex_lut_data[4]): - case PICA_REG_INDEX(texturing.proctex_lut_data[5]): - case PICA_REG_INDEX(texturing.proctex_lut_data[6]): - case PICA_REG_INDEX(texturing.proctex_lut_data[7]): - using Pica::TexturingRegs; - switch (regs.texturing.proctex_lut_config.ref_table.Value()) { - case TexturingRegs::ProcTexLutTable::Noise: - uniform_block_data.proctex_noise_lut_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::ColorMap: - uniform_block_data.proctex_color_map_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::AlphaMap: - uniform_block_data.proctex_alpha_map_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::Color: - uniform_block_data.proctex_lut_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::ColorDiff: - uniform_block_data.proctex_diff_lut_dirty = true; - break; - } - break; - - // Alpha test - case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): - SyncAlphaTest(); - shader_dirty = true; - break; - - // Sync GL stencil test + stencil write mask - // (Pica stencil test function register also contains a stencil write mask) - case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): - SyncStencilTest(); - SyncStencilWriteMask(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): - case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): - SyncStencilTest(); - break; - - // Sync GL depth test + depth and color write mask - // (Pica depth test function register also contains a depth and color write mask) - case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): - SyncDepthTest(); - SyncDepthWriteMask(); - SyncColorWriteMask(); - break; - - // Sync GL depth and stencil write mask - // (This is a dedicated combined depth / stencil write-enable register) - case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): - SyncDepthWriteMask(); - SyncStencilWriteMask(); - break; - - // Sync GL color write mask - // (This is a dedicated color write-enable register) - case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): - SyncColorWriteMask(); - break; - - case PICA_REG_INDEX(framebuffer.shadow): - SyncShadowBias(); - break; - - // Scissor test - case PICA_REG_INDEX(rasterizer.scissor_test.mode): - shader_dirty = true; - break; - - // Logic op - case PICA_REG_INDEX(framebuffer.output_merger.logic_op): - //if (GLES) { - // With GLES, we need this in the fragment shader to emulate logic operations - // shader_dirty = true; - //} - SyncLogicOp(); - break; - - case PICA_REG_INDEX(texturing.main_config): - shader_dirty = true; - break; - - // Texture 0 type - case PICA_REG_INDEX(texturing.texture0.type): - shader_dirty = true; - break; - - // TEV stages - // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) - case PICA_REG_INDEX(texturing.tev_stage0.color_source1): - case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage0.color_op): - case PICA_REG_INDEX(texturing.tev_stage0.color_scale): - case PICA_REG_INDEX(texturing.tev_stage1.color_source1): - case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage1.color_op): - case PICA_REG_INDEX(texturing.tev_stage1.color_scale): - case PICA_REG_INDEX(texturing.tev_stage2.color_source1): - case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage2.color_op): - case PICA_REG_INDEX(texturing.tev_stage2.color_scale): - case PICA_REG_INDEX(texturing.tev_stage3.color_source1): - case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage3.color_op): - case PICA_REG_INDEX(texturing.tev_stage3.color_scale): - case PICA_REG_INDEX(texturing.tev_stage4.color_source1): - case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage4.color_op): - case PICA_REG_INDEX(texturing.tev_stage4.color_scale): - case PICA_REG_INDEX(texturing.tev_stage5.color_source1): - case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage5.color_op): - case PICA_REG_INDEX(texturing.tev_stage5.color_scale): - case PICA_REG_INDEX(texturing.tev_combiner_buffer_input): - shader_dirty = true; - break; - case PICA_REG_INDEX(texturing.tev_stage0.const_r): - SyncTevConstColor(0, regs.texturing.tev_stage0); - break; - case PICA_REG_INDEX(texturing.tev_stage1.const_r): - SyncTevConstColor(1, regs.texturing.tev_stage1); - break; - case PICA_REG_INDEX(texturing.tev_stage2.const_r): - SyncTevConstColor(2, regs.texturing.tev_stage2); - break; - case PICA_REG_INDEX(texturing.tev_stage3.const_r): - SyncTevConstColor(3, regs.texturing.tev_stage3); - break; - case PICA_REG_INDEX(texturing.tev_stage4.const_r): - SyncTevConstColor(4, regs.texturing.tev_stage4); - break; - case PICA_REG_INDEX(texturing.tev_stage5.const_r): - SyncTevConstColor(5, regs.texturing.tev_stage5); - break; - - // TEV combiner buffer color - case PICA_REG_INDEX(texturing.tev_combiner_buffer_color): - SyncCombinerColor(); - break; - - // Fragment lighting switches - case PICA_REG_INDEX(lighting.disable): - case PICA_REG_INDEX(lighting.max_light_index): - case PICA_REG_INDEX(lighting.config0): - case PICA_REG_INDEX(lighting.config1): - case PICA_REG_INDEX(lighting.abs_lut_input): - case PICA_REG_INDEX(lighting.lut_input): - case PICA_REG_INDEX(lighting.lut_scale): - case PICA_REG_INDEX(lighting.light_enable): - break; - - // Fragment lighting specular 0 color - case PICA_REG_INDEX(lighting.light[0].specular_0): - SyncLightSpecular0(0); - break; - case PICA_REG_INDEX(lighting.light[1].specular_0): - SyncLightSpecular0(1); - break; - case PICA_REG_INDEX(lighting.light[2].specular_0): - SyncLightSpecular0(2); - break; - case PICA_REG_INDEX(lighting.light[3].specular_0): - SyncLightSpecular0(3); - break; - case PICA_REG_INDEX(lighting.light[4].specular_0): - SyncLightSpecular0(4); - break; - case PICA_REG_INDEX(lighting.light[5].specular_0): - SyncLightSpecular0(5); - break; - case PICA_REG_INDEX(lighting.light[6].specular_0): - SyncLightSpecular0(6); - break; - case PICA_REG_INDEX(lighting.light[7].specular_0): - SyncLightSpecular0(7); - break; - - // Fragment lighting specular 1 color - case PICA_REG_INDEX(lighting.light[0].specular_1): - SyncLightSpecular1(0); - break; - case PICA_REG_INDEX(lighting.light[1].specular_1): - SyncLightSpecular1(1); - break; - case PICA_REG_INDEX(lighting.light[2].specular_1): - SyncLightSpecular1(2); - break; - case PICA_REG_INDEX(lighting.light[3].specular_1): - SyncLightSpecular1(3); - break; - case PICA_REG_INDEX(lighting.light[4].specular_1): - SyncLightSpecular1(4); - break; - case PICA_REG_INDEX(lighting.light[5].specular_1): - SyncLightSpecular1(5); - break; - case PICA_REG_INDEX(lighting.light[6].specular_1): - SyncLightSpecular1(6); - break; - case PICA_REG_INDEX(lighting.light[7].specular_1): - SyncLightSpecular1(7); - break; - - // Fragment lighting diffuse color - case PICA_REG_INDEX(lighting.light[0].diffuse): - SyncLightDiffuse(0); - break; - case PICA_REG_INDEX(lighting.light[1].diffuse): - SyncLightDiffuse(1); - break; - case PICA_REG_INDEX(lighting.light[2].diffuse): - SyncLightDiffuse(2); - break; - case PICA_REG_INDEX(lighting.light[3].diffuse): - SyncLightDiffuse(3); - break; - case PICA_REG_INDEX(lighting.light[4].diffuse): - SyncLightDiffuse(4); - break; - case PICA_REG_INDEX(lighting.light[5].diffuse): - SyncLightDiffuse(5); - break; - case PICA_REG_INDEX(lighting.light[6].diffuse): - SyncLightDiffuse(6); - break; - case PICA_REG_INDEX(lighting.light[7].diffuse): - SyncLightDiffuse(7); - break; - - // Fragment lighting ambient color - case PICA_REG_INDEX(lighting.light[0].ambient): - SyncLightAmbient(0); - break; - case PICA_REG_INDEX(lighting.light[1].ambient): - SyncLightAmbient(1); - break; - case PICA_REG_INDEX(lighting.light[2].ambient): - SyncLightAmbient(2); - break; - case PICA_REG_INDEX(lighting.light[3].ambient): - SyncLightAmbient(3); - break; - case PICA_REG_INDEX(lighting.light[4].ambient): - SyncLightAmbient(4); - break; - case PICA_REG_INDEX(lighting.light[5].ambient): - SyncLightAmbient(5); - break; - case PICA_REG_INDEX(lighting.light[6].ambient): - SyncLightAmbient(6); - break; - case PICA_REG_INDEX(lighting.light[7].ambient): - SyncLightAmbient(7); - break; - - // Fragment lighting position - case PICA_REG_INDEX(lighting.light[0].x): - case PICA_REG_INDEX(lighting.light[0].z): - SyncLightPosition(0); - break; - case PICA_REG_INDEX(lighting.light[1].x): - case PICA_REG_INDEX(lighting.light[1].z): - SyncLightPosition(1); - break; - case PICA_REG_INDEX(lighting.light[2].x): - case PICA_REG_INDEX(lighting.light[2].z): - SyncLightPosition(2); - break; - case PICA_REG_INDEX(lighting.light[3].x): - case PICA_REG_INDEX(lighting.light[3].z): - SyncLightPosition(3); - break; - case PICA_REG_INDEX(lighting.light[4].x): - case PICA_REG_INDEX(lighting.light[4].z): - SyncLightPosition(4); - break; - case PICA_REG_INDEX(lighting.light[5].x): - case PICA_REG_INDEX(lighting.light[5].z): - SyncLightPosition(5); - break; - case PICA_REG_INDEX(lighting.light[6].x): - case PICA_REG_INDEX(lighting.light[6].z): - SyncLightPosition(6); - break; - case PICA_REG_INDEX(lighting.light[7].x): - case PICA_REG_INDEX(lighting.light[7].z): - SyncLightPosition(7); - break; - - // Fragment spot lighting direction - case PICA_REG_INDEX(lighting.light[0].spot_x): - case PICA_REG_INDEX(lighting.light[0].spot_z): - SyncLightSpotDirection(0); - break; - case PICA_REG_INDEX(lighting.light[1].spot_x): - case PICA_REG_INDEX(lighting.light[1].spot_z): - SyncLightSpotDirection(1); - break; - case PICA_REG_INDEX(lighting.light[2].spot_x): - case PICA_REG_INDEX(lighting.light[2].spot_z): - SyncLightSpotDirection(2); - break; - case PICA_REG_INDEX(lighting.light[3].spot_x): - case PICA_REG_INDEX(lighting.light[3].spot_z): - SyncLightSpotDirection(3); - break; - case PICA_REG_INDEX(lighting.light[4].spot_x): - case PICA_REG_INDEX(lighting.light[4].spot_z): - SyncLightSpotDirection(4); - break; - case PICA_REG_INDEX(lighting.light[5].spot_x): - case PICA_REG_INDEX(lighting.light[5].spot_z): - SyncLightSpotDirection(5); - break; - case PICA_REG_INDEX(lighting.light[6].spot_x): - case PICA_REG_INDEX(lighting.light[6].spot_z): - SyncLightSpotDirection(6); - break; - case PICA_REG_INDEX(lighting.light[7].spot_x): - case PICA_REG_INDEX(lighting.light[7].spot_z): - SyncLightSpotDirection(7); - break; - - // Fragment lighting light source config - case PICA_REG_INDEX(lighting.light[0].config): - case PICA_REG_INDEX(lighting.light[1].config): - case PICA_REG_INDEX(lighting.light[2].config): - case PICA_REG_INDEX(lighting.light[3].config): - case PICA_REG_INDEX(lighting.light[4].config): - case PICA_REG_INDEX(lighting.light[5].config): - case PICA_REG_INDEX(lighting.light[6].config): - case PICA_REG_INDEX(lighting.light[7].config): - shader_dirty = true; - break; - - // Fragment lighting distance attenuation bias - case PICA_REG_INDEX(lighting.light[0].dist_atten_bias): - SyncLightDistanceAttenuationBias(0); - break; - case PICA_REG_INDEX(lighting.light[1].dist_atten_bias): - SyncLightDistanceAttenuationBias(1); - break; - case PICA_REG_INDEX(lighting.light[2].dist_atten_bias): - SyncLightDistanceAttenuationBias(2); - break; - case PICA_REG_INDEX(lighting.light[3].dist_atten_bias): - SyncLightDistanceAttenuationBias(3); - break; - case PICA_REG_INDEX(lighting.light[4].dist_atten_bias): - SyncLightDistanceAttenuationBias(4); - break; - case PICA_REG_INDEX(lighting.light[5].dist_atten_bias): - SyncLightDistanceAttenuationBias(5); - break; - case PICA_REG_INDEX(lighting.light[6].dist_atten_bias): - SyncLightDistanceAttenuationBias(6); - break; - case PICA_REG_INDEX(lighting.light[7].dist_atten_bias): - SyncLightDistanceAttenuationBias(7); - break; - - // Fragment lighting distance attenuation scale - case PICA_REG_INDEX(lighting.light[0].dist_atten_scale): - SyncLightDistanceAttenuationScale(0); - break; - case PICA_REG_INDEX(lighting.light[1].dist_atten_scale): - SyncLightDistanceAttenuationScale(1); - break; - case PICA_REG_INDEX(lighting.light[2].dist_atten_scale): - SyncLightDistanceAttenuationScale(2); - break; - case PICA_REG_INDEX(lighting.light[3].dist_atten_scale): - SyncLightDistanceAttenuationScale(3); - break; - case PICA_REG_INDEX(lighting.light[4].dist_atten_scale): - SyncLightDistanceAttenuationScale(4); - break; - case PICA_REG_INDEX(lighting.light[5].dist_atten_scale): - SyncLightDistanceAttenuationScale(5); - break; - case PICA_REG_INDEX(lighting.light[6].dist_atten_scale): - SyncLightDistanceAttenuationScale(6); - break; - case PICA_REG_INDEX(lighting.light[7].dist_atten_scale): - SyncLightDistanceAttenuationScale(7); - break; - - // Fragment lighting global ambient color (emission + ambient * ambient) - case PICA_REG_INDEX(lighting.global_ambient): - SyncGlobalAmbient(); - break; - - // Fragment lighting lookup tables - case PICA_REG_INDEX(lighting.lut_data[0]): - case PICA_REG_INDEX(lighting.lut_data[1]): - case PICA_REG_INDEX(lighting.lut_data[2]): - case PICA_REG_INDEX(lighting.lut_data[3]): - case PICA_REG_INDEX(lighting.lut_data[4]): - case PICA_REG_INDEX(lighting.lut_data[5]): - case PICA_REG_INDEX(lighting.lut_data[6]): - case PICA_REG_INDEX(lighting.lut_data[7]): { - const auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lighting_lut_dirty[lut_config.type] = true; - uniform_block_data.lighting_lut_dirty_any = true; - break; - } - } -} - -void RasterizerVulkan::FlushAll() { - MICROPROFILE_SCOPE(Vulkan_CacheManagement); - res_cache.FlushAll(); -} - -void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(Vulkan_CacheManagement); - res_cache.FlushRegion(addr, size); -} - -void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(Vulkan_CacheManagement); - res_cache.InvalidateRegion(addr, size, nullptr); -} - -void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { - MICROPROFILE_SCOPE(Vulkan_CacheManagement); - res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size, nullptr); -} - -void RasterizerVulkan::ClearAll(bool flush) { - res_cache.ClearAll(flush); -} - -bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { - MICROPROFILE_SCOPE(Vulkan_Blits); - - SurfaceParams src_params; - src_params.addr = config.GetPhysicalInputAddress(); - src_params.width = config.output_width; - src_params.stride = config.input_width; - src_params.height = config.output_height; - src_params.is_tiled = !config.input_linear; - src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); - src_params.UpdateParams(); - - SurfaceParams dst_params; - dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 - : config.output_width.Value(); - dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 - : config.output_height.Value(); - dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); - dst_params.UpdateParams(); - - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - if (src_surface == nullptr) - return false; - - dst_params.res_scale = src_surface->res_scale; - - Common::Rectangle dst_rect; - Surface dst_surface; - std::tie(dst_surface, dst_rect) = - res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); - if (dst_surface == nullptr) - return false; - - if (src_surface->is_tiled != dst_surface->is_tiled) - std::swap(src_rect.top, src_rect.bottom); - - if (config.flip_vertically) - std::swap(src_rect.top, src_rect.bottom); - - if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) - return false; - - res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); - return true; -} - -bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { - u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); - if (copy_size == 0) { - return false; - } - - u32 input_gap = config.texture_copy.input_gap * 16; - u32 input_width = config.texture_copy.input_width * 16; - if (input_width == 0 && input_gap != 0) { - return false; - } - if (input_gap == 0 || input_width >= copy_size) { - input_width = copy_size; - input_gap = 0; - } - if (copy_size % input_width != 0) { - return false; - } - - u32 output_gap = config.texture_copy.output_gap * 16; - u32 output_width = config.texture_copy.output_width * 16; - if (output_width == 0 && output_gap != 0) { - return false; - } - if (output_gap == 0 || output_width >= copy_size) { - output_width = copy_size; - output_gap = 0; - } - if (copy_size % output_width != 0) { - return false; - } - - SurfaceParams src_params; - src_params.addr = config.GetPhysicalInputAddress(); - src_params.stride = input_width + input_gap; // stride in bytes - src_params.width = input_width; // width in bytes - src_params.height = copy_size / input_width; - src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; - src_params.end = src_params.addr + src_params.size; - - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); - if (src_surface == nullptr) { - return false; - } - - if (output_gap != 0 && - (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * - (src_surface->is_tiled ? 8 : 1) || - output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { - return false; - } - - SurfaceParams dst_params = *src_surface; - dst_params.addr = config.GetPhysicalOutputAddress(); - dst_params.width = src_rect.GetWidth() / src_surface->res_scale; - dst_params.stride = dst_params.width + src_surface->PixelsInBytes( - src_surface->is_tiled ? output_gap / 8 : output_gap); - dst_params.height = src_rect.GetHeight() / src_surface->res_scale; - dst_params.res_scale = src_surface->res_scale; - dst_params.UpdateParams(); - - // Since we are going to invalidate the gap if there is one, we will have to load it first - const bool load_gap = output_gap != 0; - Common::Rectangle dst_rect; - Surface dst_surface; - std::tie(dst_surface, dst_rect) = - res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); - if (dst_surface == nullptr) { - return false; - } - - if (dst_surface->type == SurfaceType::Texture) { - return false; - } - - if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { - return false; - } - - res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); - return true; -} - -bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - Surface dst_surface = res_cache.GetFillSurface(config); - if (dst_surface == nullptr) - return false; - - res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); - return true; -} - -bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, - PAddr framebuffer_addr, u32 pixel_stride, - ScreenInfo& screen_info) { - if (framebuffer_addr == 0) { - return false; - } - MICROPROFILE_SCOPE(Vulkan_CacheManagement); - - SurfaceParams src_params; - src_params.addr = framebuffer_addr; - src_params.width = std::min(config.width.Value(), pixel_stride); - src_params.height = config.height; - src_params.stride = pixel_stride; - src_params.is_tiled = false; - src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); - src_params.UpdateParams(); - - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - - if (src_surface == nullptr) { - return false; - } - - u32 scaled_width = src_surface->GetScaledWidth(); - u32 scaled_height = src_surface->GetScaledHeight(); - - screen_info.display_texcoords = Common::Rectangle( - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - - screen_info.display_texture = &src_surface->texture; - return true; -} - -void RasterizerVulkan::SyncClipEnabled() { - //state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; -} - -void RasterizerVulkan::SyncClipCoef() { - const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); - const glm::vec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), - raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; - if (new_clip_coef != uniform_block_data.data.clip_coef) { - uniform_block_data.data.clip_coef = new_clip_coef; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncCullMode() { - const auto& regs = Pica::g_state.regs; - - auto& state = VulkanState::Get(); - state.SetCullMode(vk::CullModeFlagBits::eNone); - return; - - switch (regs.rasterizer.cull_mode) { - case Pica::RasterizerRegs::CullMode::KeepAll: - state.SetCullMode(vk::CullModeFlagBits::eNone); - break; - - case Pica::RasterizerRegs::CullMode::KeepClockWise: - state.SetCullMode(vk::CullModeFlagBits::eFront); - state.SetFrontFace(vk::FrontFace::eClockwise); - break; - - case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: - state.SetCullMode(vk::CullModeFlagBits::eFront); - state.SetFrontFace(vk::FrontFace::eCounterClockwise); - break; - - default: - LOG_CRITICAL(Render_Vulkan, "Unknown cull mode {}", - static_cast(regs.rasterizer.cull_mode.Value())); - UNIMPLEMENTED(); - break; - } -} - -void RasterizerVulkan::SyncDepthScale() { - float depth_scale = - Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); - if (depth_scale != uniform_block_data.data.depth_scale) { - uniform_block_data.data.depth_scale = depth_scale; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncDepthOffset() { - float depth_offset = - Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32(); - if (depth_offset != uniform_block_data.data.depth_offset) { - uniform_block_data.data.depth_offset = depth_offset; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncBlendEnabled() { - auto& state = VulkanState::Get(); - state.SetBlendEnable(Pica::g_state.regs.framebuffer.output_merger.alphablend_enable); -} - -void RasterizerVulkan::SyncBlendFuncs() { - const auto& regs = Pica::g_state.regs; - auto rgb_op = PicaToVK::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); - auto alpha_op = PicaToVK::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); - auto src_color = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); - auto dst_color = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); - auto src_alpha = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); - auto dst_alpha = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); - - auto& state = VulkanState::Get(); - state.SetBlendOp(rgb_op, alpha_op, src_color, dst_color, src_alpha, dst_alpha); -} - -void RasterizerVulkan::SyncBlendColor() { - auto color = PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); - - auto& state = VulkanState::Get(); - state.SetBlendCostants(color.r, color.g, color.b, color.a); -} - -void RasterizerVulkan::SyncFogColor() { - const auto& regs = Pica::g_state.regs; - uniform_block_data.data.fog_color = { - regs.texturing.fog_color.r.Value() / 255.0f, - regs.texturing.fog_color.g.Value() / 255.0f, - regs.texturing.fog_color.b.Value() / 255.0f, - }; - uniform_block_data.dirty = true; -} - -void RasterizerVulkan::SyncProcTexNoise() { - const auto& regs = Pica::g_state.regs.texturing; - uniform_block_data.data.proctex_noise_f = { - Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), - Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), - }; - uniform_block_data.data.proctex_noise_a = { - regs.proctex_noise_u.amplitude / 4095.0f, - regs.proctex_noise_v.amplitude / 4095.0f, - }; - uniform_block_data.data.proctex_noise_p = { - Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), - Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), - }; - - uniform_block_data.dirty = true; -} - -void RasterizerVulkan::SyncProcTexBias() { - const auto& regs = Pica::g_state.regs.texturing; - uniform_block_data.data.proctex_bias = - Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8)) - .ToFloat32(); - - uniform_block_data.dirty = true; -} - -void RasterizerVulkan::SyncAlphaTest() { - const auto& regs = Pica::g_state.regs; - if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { - uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLogicOp() { - const auto& regs = Pica::g_state.regs; - - auto& state = VulkanState::Get(); - state.SetLogicOp(PicaToVK::LogicOp(regs.framebuffer.output_merger.logic_op)); -} - -void RasterizerVulkan::SyncColorWriteMask() { - const auto& regs = Pica::g_state.regs; - - auto WriteEnabled = [&](u32 value) { - return regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0; - }; - - vk::ColorComponentFlags mask; - if (WriteEnabled(regs.framebuffer.output_merger.red_enable)) - mask |= vk::ColorComponentFlagBits::eR; - if (WriteEnabled(regs.framebuffer.output_merger.green_enable)) - mask |= vk::ColorComponentFlagBits::eG; - if (WriteEnabled(regs.framebuffer.output_merger.blue_enable)) - mask |= vk::ColorComponentFlagBits::eB; - if (WriteEnabled(regs.framebuffer.output_merger.alpha_enable)) - mask |= vk::ColorComponentFlagBits::eA; - - auto& state = VulkanState::Get(); - state.SetColorMask(mask); -} - -void RasterizerVulkan::SyncStencilWriteMask() { - const auto& regs = Pica::g_state.regs; - - auto& state = VulkanState::Get(); - state.SetStencilWrite((regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) - ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) - : 0); -} - -void RasterizerVulkan::SyncDepthWriteMask() { - const auto& regs = Pica::g_state.regs; - - auto& state = VulkanState::Get(); - state.SetDepthWrite(regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && - regs.framebuffer.output_merger.depth_write_enable); -} - -void RasterizerVulkan::SyncStencilTest() { - const auto& regs = Pica::g_state.regs; - - bool enabled = regs.framebuffer.output_merger.stencil_test.enable && - regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - auto func = PicaToVK::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); - auto ref = regs.framebuffer.output_merger.stencil_test.reference_value; - auto mask = regs.framebuffer.output_merger.stencil_test.input_mask; - auto stencil_fail = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); - auto depth_fail = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); - auto depth_pass = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); - - auto& state = VulkanState::Get(); - state.SetStencilTest(enabled, stencil_fail, depth_pass, depth_fail, func, ref); - state.SetStencilInput(mask); -} - -void RasterizerVulkan::SyncDepthTest() { - const auto& regs = Pica::g_state.regs; - bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || - regs.framebuffer.output_merger.depth_write_enable == 1; - auto test_func = regs.framebuffer.output_merger.depth_test_enable == 1 - ? PicaToVK::CompareFunc(regs.framebuffer.output_merger.depth_test_func) - : vk::CompareOp::eAlways; - - auto& state = VulkanState::Get(); - state.SetDepthTest(test_enabled, test_func); -} - -void RasterizerVulkan::SyncCombinerColor() { - auto combiner_color = - PicaToVK::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); - if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { - uniform_block_data.data.tev_combiner_buffer_color = combiner_color; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncTevConstColor(std::size_t stage_index, - const Pica::TexturingRegs::TevStageConfig& tev_stage) { - const auto const_color = PicaToVK::ColorRGBA8(tev_stage.const_color); - - if (const_color == uniform_block_data.data.const_color[stage_index]) { - return; - } - - uniform_block_data.data.const_color[stage_index] = const_color; - uniform_block_data.dirty = true; -} - -void RasterizerVulkan::SyncGlobalAmbient() { - auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.global_ambient); - if (color != uniform_block_data.data.lighting_global_ambient) { - uniform_block_data.data.lighting_global_ambient = color; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightSpecular0(int light_index) { - auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); - if (color != uniform_block_data.data.light_src[light_index].specular_0) { - uniform_block_data.data.light_src[light_index].specular_0 = color; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightSpecular1(int light_index) { - auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); - if (color != uniform_block_data.data.light_src[light_index].specular_1) { - uniform_block_data.data.light_src[light_index].specular_1 = color; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightDiffuse(int light_index) { - auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); - if (color != uniform_block_data.data.light_src[light_index].diffuse) { - uniform_block_data.data.light_src[light_index].diffuse = color; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightAmbient(int light_index) { - auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); - if (color != uniform_block_data.data.light_src[light_index].ambient) { - uniform_block_data.data.light_src[light_index].ambient = color; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightPosition(int light_index) { - glm::vec3 position = { - Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), - Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), - Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()}; - - if (position != uniform_block_data.data.light_src[light_index].position) { - uniform_block_data.data.light_src[light_index].position = position; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightSpotDirection(int light_index) { - const auto& light = Pica::g_state.regs.lighting.light[light_index]; - glm::vec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f, - light.spot_z / 2047.0f}; - - if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { - uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) { - float dist_atten_bias = - Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) - .ToFloat32(); - - if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { - uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) { - float dist_atten_scale = - Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) - .ToFloat32(); - - if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { - uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncShadowBias() { - const auto& shadow = Pica::g_state.regs.framebuffer.shadow; - float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); - float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); - - if (constant != uniform_block_data.data.shadow_bias_constant || - linear != uniform_block_data.data.shadow_bias_linear) { - uniform_block_data.data.shadow_bias_constant = constant; - uniform_block_data.data.shadow_bias_linear = linear; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncShadowTextureBias() { - int bias = Pica::g_state.regs.texturing.shadow.bias << 1; - if (bias != uniform_block_data.data.shadow_texture_bias) { - uniform_block_data.data.shadow_texture_bias = bias; - uniform_block_data.dirty = true; - } -} - -void RasterizerVulkan::SyncAndUploadLUTsLF() { - constexpr std::size_t max_size = - sizeof(glm::vec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(glm::vec2) * 128; // fog - - if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { - return; - } - - std::size_t bytes_used = 0; - u8* buffer = nullptr; u32 offset = 0; bool invalidate = false; - std::tie(buffer, offset, invalidate) = texture_buffer_lut_lf.Map(max_size, sizeof(glm::vec4)); - - // Sync the lighting luts - if (uniform_block_data.lighting_lut_dirty_any || invalidate) { - for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { - if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { - std::array new_data; - const auto& source_lut = Pica::g_state.lighting.luts[index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), - [](const auto& entry) { - return glm::vec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != lighting_lut_data[index] || invalidate) { - lighting_lut_data[index] = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(glm::vec2)); - uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = - static_cast((offset + bytes_used) / sizeof(glm::vec2)); - uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(glm::vec2); - } - uniform_block_data.lighting_lut_dirty[index] = false; - } - } - uniform_block_data.lighting_lut_dirty_any = false; - } - - // Sync the fog lut - if (uniform_block_data.fog_lut_dirty || invalidate) { - std::array new_data; - - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), - [](const auto& entry) { - return glm::vec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != fog_lut_data || invalidate) { - fog_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(glm::vec2)); - uniform_block_data.data.fog_lut_offset = - static_cast((offset + bytes_used) / sizeof(glm::vec2)); - uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(glm::vec2); - } - uniform_block_data.fog_lut_dirty = false; - } - - texture_buffer_lut_lf.Commit(bytes_used); -} - -void RasterizerVulkan::SyncAndUploadLUTs() { - constexpr std::size_t max_size = sizeof(glm::vec2) * 128 * 3 + // proctex: noise + color + alpha - sizeof(glm::vec4) * 256 + // proctex - sizeof(glm::vec4) * 256; // proctex diff - - if (!uniform_block_data.proctex_noise_lut_dirty && - !uniform_block_data.proctex_color_map_dirty && - !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && - !uniform_block_data.proctex_diff_lut_dirty) { - return; - } - - std::size_t bytes_used = 0; - u8* buffer = nullptr; u32 offset = 0; bool invalidate = false; - std::tie(buffer, offset, invalidate) = texture_buffer_lut.Map(max_size, sizeof(glm::vec4)); - - // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap - auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( - const std::array& lut, - std::array& lut_data, int& lut_offset) { - std::array new_data; - std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { - return glm::vec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != lut_data || invalidate) { - lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(glm::vec2)); - lut_offset = static_cast((offset + bytes_used) / sizeof(glm::vec2)); - uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(glm::vec2); - } - }; - - // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { - SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - uniform_block_data.data.proctex_noise_lut_offset); - uniform_block_data.proctex_noise_lut_dirty = false; - } - - // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty || invalidate) { - SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - uniform_block_data.data.proctex_color_map_offset); - uniform_block_data.proctex_color_map_dirty = false; - } - - // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { - SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - uniform_block_data.data.proctex_alpha_map_offset); - uniform_block_data.proctex_alpha_map_dirty = false; - } - - // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty || invalidate) { - std::array new_data; - - std::transform(Pica::g_state.proctex.color_table.begin(), - Pica::g_state.proctex.color_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return glm::vec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_lut_data || invalidate) { - proctex_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(glm::vec4)); - uniform_block_data.data.proctex_lut_offset = - static_cast((offset + bytes_used) / sizeof(glm::vec4)); - uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(glm::vec4); - } - uniform_block_data.proctex_lut_dirty = false; - } - - // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { - std::array new_data; - - std::transform(Pica::g_state.proctex.color_diff_table.begin(), - Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return glm::vec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_diff_lut_data || invalidate) { - proctex_diff_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(glm::vec4)); - uniform_block_data.data.proctex_diff_lut_offset = - static_cast((offset + bytes_used) / sizeof(glm::vec4)); - uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(glm::vec4); - } - uniform_block_data.proctex_diff_lut_dirty = false; - } - - texture_buffer_lut.Commit(bytes_used); -} - -void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { - bool sync_vs = accelerate_draw; - bool sync_fs = uniform_block_data.dirty; - - if (!sync_vs && !sync_fs) - return; - - u32 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; - - std::size_t used_bytes = 0; - u8* uniforms = nullptr; u32 offset = 0; bool invalidate = false; - std::tie(uniforms, offset, invalidate) = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); - - auto& state = VulkanState::Get(); - - // Reserved when acceleration is implemented - std::memset(uniforms + used_bytes, 0, sizeof(VSUniformData)); - used_bytes += uniform_size_aligned_vs; - - if (sync_fs || invalidate) { - std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData)); - state.SetUniformBuffer(0, offset + used_bytes, sizeof(UniformData), uniform_buffer); - uniform_block_data.dirty = false; - used_bytes += uniform_size_aligned_fs; - } - - uniform_buffer.Commit(used_bytes); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h deleted file mode 100644 index e0ecc0cf9..000000000 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/common_types.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/regs_lighting.h" -#include "video_core/regs_texturing.h" -#include "video_core/shader/shader.h" -#include "video_core/renderer_vulkan/vk_state.h" -#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" - -namespace Frontend { -class EmuWindow; -} - -namespace Vulkan { - -enum class UniformBindings : u32 { - Common = 0, - VertexShader = 1, - GeometryShader = 2 -}; - -struct LightSrc { - alignas(16) glm::vec3 specular_0; - alignas(16) glm::vec3 specular_1; - alignas(16) glm::vec3 diffuse; - alignas(16) glm::vec3 ambient; - alignas(16) glm::vec3 position; - alignas(16) glm::vec3 spot_direction; // negated - float dist_atten_bias; - float dist_atten_scale; -}; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at -// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -// Not following that rule will cause problems on some AMD drivers. -struct UniformData { - int framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - float shadow_bias_constant; - float shadow_bias_linear; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - int fog_lut_offset; - int proctex_noise_lut_offset; - int proctex_color_map_offset; - int proctex_alpha_map_offset; - int proctex_lut_offset; - int proctex_diff_lut_offset; - float proctex_bias; - int shadow_texture_bias; - alignas(16) glm::ivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; - alignas(16) glm::vec3 fog_color; - alignas(8) glm::vec2 proctex_noise_f; - alignas(8) glm::vec2 proctex_noise_a; - alignas(8) glm::vec2 proctex_noise_p; - alignas(16) glm::vec3 lighting_global_ambient; - LightSrc light_src[8]; - alignas(16) glm::vec4 const_color[6]; // A vec4 color for each of the six tev stages - alignas(16) glm::vec4 tev_combiner_buffer_color; - alignas(16) glm::vec4 clip_coef; -}; - -static_assert(sizeof(UniformData) == 0x4F0, - "The size of the UniformData structure has changed, update the structure in the shader"); -static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - -/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. -/// NOTE: the same rule from UniformData also applies here. -struct PicaUniformsData { - void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); - - struct BoolAligned { - alignas(16) int b; - }; - - std::array bools; - alignas(16) std::array i; - alignas(16) std::array f; -}; - -struct VSUniformData { - PicaUniformsData uniforms; -}; - -static_assert(sizeof(VSUniformData) == 1856, - "The size of the VSUniformData structure has changed, update the structure in the shader"); -static_assert(sizeof(VSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - -struct ScreenInfo; -class CommandScheduler; - -class RasterizerVulkan : public VideoCore::RasterizerInterface { -public: - explicit RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window); - ~RasterizerVulkan() override; - - void LoadDiskResources(const std::atomic_bool& stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) override; - - void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) override; - void DrawTriangles() override; - void NotifyPicaRegisterChanged(u32 id) override; - void FlushAll() override; - void FlushRegion(PAddr addr, u32 size) override; - void InvalidateRegion(PAddr addr, u32 size) override; - void FlushAndInvalidateRegion(PAddr addr, u32 size) override; - void ClearAll(bool flush) override; - bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; - bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; - bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; - bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, - u32 pixel_stride, Vulkan::ScreenInfo& screen_info) override; - bool AccelerateDrawBatch(bool is_indexed) override { return false; } - - /// Syncs entire status to match PICA registers - void SyncEntireState() override; - -private: - /// Syncs the clip enabled status to match the PICA register - void SyncClipEnabled(); - - /// Syncs the clip coefficients to match the PICA register - void SyncClipCoef(); - - /// Sets the OpenGL shader in accordance with the current PICA register state - void SetShader(); - - /// Syncs the cull mode to match the PICA register - void SyncCullMode(); - - /// Syncs the depth scale to match the PICA register - void SyncDepthScale(); - - /// Syncs the depth offset to match the PICA register - void SyncDepthOffset(); - - /// Syncs the blend enabled status to match the PICA register - void SyncBlendEnabled(); - - /// Syncs the blend functions to match the PICA register - void SyncBlendFuncs(); - - /// Syncs the blend color to match the PICA register - void SyncBlendColor(); - - /// Syncs the fog states to match the PICA register - void SyncFogColor(); - - /// Sync the procedural texture noise configuration to match the PICA register - void SyncProcTexNoise(); - - /// Sync the procedural texture bias configuration to match the PICA register - void SyncProcTexBias(); - - /// Syncs the alpha test states to match the PICA register - void SyncAlphaTest(); - - /// Syncs the logic op states to match the PICA register - void SyncLogicOp(); - - /// Syncs the color write mask to match the PICA register state - void SyncColorWriteMask(); - - /// Syncs the stencil write mask to match the PICA register state - void SyncStencilWriteMask(); - - /// Syncs the depth write mask to match the PICA register state - void SyncDepthWriteMask(); - - /// Syncs the stencil test states to match the PICA register - void SyncStencilTest(); - - /// Syncs the depth test states to match the PICA register - void SyncDepthTest(); - - /// Syncs the TEV combiner color buffer to match the PICA register - void SyncCombinerColor(); - - /// Syncs the TEV constant color to match the PICA register - void SyncTevConstColor(std::size_t tev_index, - const Pica::TexturingRegs::TevStageConfig& tev_stage); - - /// Syncs the lighting global ambient color to match the PICA register - void SyncGlobalAmbient(); - - /// Syncs the specified light's specular 0 color to match the PICA register - void SyncLightSpecular0(int light_index); - - /// Syncs the specified light's specular 1 color to match the PICA register - void SyncLightSpecular1(int light_index); - - /// Syncs the specified light's diffuse color to match the PICA register - void SyncLightDiffuse(int light_index); - - /// Syncs the specified light's ambient color to match the PICA register - void SyncLightAmbient(int light_index); - - /// Syncs the specified light's position to match the PICA register - void SyncLightPosition(int light_index); - - /// Syncs the specified spot light direcition to match the PICA register - void SyncLightSpotDirection(int light_index); - - /// Syncs the specified light's distance attenuation bias to match the PICA register - void SyncLightDistanceAttenuationBias(int light_index); - - /// Syncs the specified light's distance attenuation scale to match the PICA register - void SyncLightDistanceAttenuationScale(int light_index); - - /// Syncs the shadow rendering bias to match the PICA register - void SyncShadowBias(); - - /// Syncs the shadow texture bias to match the PICA register - void SyncShadowTextureBias(); - - /// Syncs and uploads the lighting, fog and proctex LUTs - void SyncAndUploadLUTs(); - void SyncAndUploadLUTsLF(); - - /// Upload the uniform blocks to the uniform buffer object - void UploadUniforms(bool accelerate_draw); - - /// Generic draw function for DrawTriangles and AccelerateDrawBatch - bool Draw(bool accelerate, bool is_indexed); - - struct VertexArrayInfo { - u32 vs_input_index_min; - u32 vs_input_index_max; - u32 vs_input_size; - }; - -private: - CommandScheduler& scheduler; - RasterizerCacheVulkan res_cache; - std::vector vertex_batch; - bool shader_dirty = true; - - struct { - UniformData data; - std::array lighting_lut_dirty; - bool lighting_lut_dirty_any; - bool fog_lut_dirty; - bool proctex_noise_lut_dirty; - bool proctex_color_map_dirty; - bool proctex_alpha_map_dirty; - bool proctex_lut_dirty; - bool proctex_diff_lut_dirty; - bool dirty; - } uniform_block_data = {}; - - StreamBuffer vertex_buffer, index_buffer; - StreamBuffer uniform_buffer, texture_buffer_lut_lf, texture_buffer_lut; - - u32 uniform_buffer_alignment; - u32 uniform_size_aligned_vs, uniform_size_aligned_fs; - - std::array, - Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; - std::array fog_lut_data{}; - std::array proctex_noise_lut_data{}; - std::array proctex_color_map_data{}; - std::array proctex_alpha_map_data{}; - std::array proctex_lut_data{}; - std::array proctex_diff_lut_data{}; - - bool allow_shadow{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp deleted file mode 100644 index 160496023..000000000 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp +++ /dev/null @@ -1,1513 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "common/alignment.h" -#include "common/bit_field.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "common/vector_math.h" -#include "core/memory.h" -#include "video_core/pica_state.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" -#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" -#include "video_core/renderer_vulkan/vk_format_reinterpreter.h" -#include "video_core/renderer_vulkan/vk_state.h" -#include "video_core/utils.h" -#include "video_core/video_core.h" - -namespace Vulkan { - -using SurfaceType = SurfaceParams::SurfaceType; -using PixelFormat = SurfaceParams::PixelFormat; - -static constexpr std::array fb_format_tuples = {{ - vk::Format::eR8G8B8A8Unorm, // RGBA8 - vk::Format::eR8G8B8Unorm, // RGB8 - vk::Format::eR5G5B5A1UnormPack16, // RGB5A1 - vk::Format::eR5G6B5UnormPack16, // RGB565 - vk::Format::eR4G4B4A4UnormPack16, // RGBA4 -}}; - -static constexpr std::array depth_format_tuples = {{ - vk::Format::eD16Unorm, // D16 - vk::Format::eUndefined, - vk::Format::eD24UnormS8Uint, // D24 - vk::Format::eD24UnormS8Uint, // D24S8 -}}; - -vk::Format GetFormatTuple(PixelFormat pixel_format) { - const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); - if (type == SurfaceType::Color) { - ASSERT(static_cast(pixel_format) < fb_format_tuples.size()); - return fb_format_tuples[static_cast(pixel_format)]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - std::size_t tuple_idx = static_cast(pixel_format) - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - return depth_format_tuples[tuple_idx]; - } - return vk::Format::eR8G8B8A8Unorm; -} - -template -static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -template -static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gpu_buffer) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 vk_bytes_per_pixel = CachedSurface::GetBytesPerPixel(format); - for (u32 y = 0; y < 8; ++y) { - for (u32 x = 0; x < 8; ++x) { - u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; - u8* gpu_ptr = gpu_buffer + ((7 - y) * stride + x) * vk_bytes_per_pixel; - if constexpr (morton_to_gl) { - if constexpr (format == PixelFormat::D24S8) { - gpu_ptr[0] = tile_ptr[3]; - std::memcpy(gpu_ptr + 1, tile_ptr, 3); - } else if (format == PixelFormat::RGBA8) { - gpu_ptr[0] = tile_ptr[3]; - gpu_ptr[1] = tile_ptr[2]; - gpu_ptr[2] = tile_ptr[1]; - gpu_ptr[3] = tile_ptr[0]; - } else if (format == PixelFormat::RGB8) { - gpu_ptr[0] = tile_ptr[2]; - gpu_ptr[1] = tile_ptr[1]; - gpu_ptr[2] = tile_ptr[0]; - } else { - std::memcpy(gpu_ptr, tile_ptr, bytes_per_pixel); - } - } else { - if constexpr (format == PixelFormat::D24S8) { - std::memcpy(tile_ptr, gpu_ptr + 1, 3); - tile_ptr[3] = gpu_ptr[0]; - } else if (format == PixelFormat::RGBA8) { - // because GLES does not have ABGR format - // so we will do byteswapping here - tile_ptr[0] = gpu_ptr[3]; - tile_ptr[1] = gpu_ptr[2]; - tile_ptr[2] = gpu_ptr[1]; - tile_ptr[3] = gpu_ptr[0]; - } else if (format == PixelFormat::RGB8) { - tile_ptr[0] = gpu_ptr[2]; - tile_ptr[1] = gpu_ptr[1]; - tile_ptr[2] = gpu_ptr[0]; - } else { - std::memcpy(tile_ptr, gpu_ptr, bytes_per_pixel); - } - } - } - } -} - -template -static void MortonCopy(u32 stride, u32 height, u8* gpu_buffer, PAddr base, PAddr start, PAddr end) { - constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 tile_size = bytes_per_pixel * 64; - - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetBytesPerPixel(format); - static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); - gpu_buffer += gl_bytes_per_pixel - bytes_per_pixel; - - const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); - const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); - const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); - - ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); - - const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; - u32 x = (begin_pixel_index % (stride * 8)) / 8; - u32 y = (begin_pixel_index / (stride * 8)) * 8; - - gpu_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; - - auto gpubuf_next_tile = [&] { - x = (x + 8) % stride; - gpu_buffer += 8 * gl_bytes_per_pixel; - if (!x) { - y += 8; - gpu_buffer -= stride * 9 * gl_bytes_per_pixel; - } - }; - - u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); - - if (start < aligned_start && !morton_to_gl) { - std::array tmp_buf; - MortonCopyTile(stride, &tmp_buf[0], gpu_buffer); - std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], - std::min(aligned_start, end) - start); - - tile_buffer += aligned_start - start; - gpubuf_next_tile(); - } - - const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; - PAddr current_paddr = aligned_start; - while (tile_buffer < buffer_end) { - // Pokemon Super Mystery Dungeon will try to use textures that go beyond - // the end address of VRAM. Stop reading if reaches invalid address - if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) || - !VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) { - LOG_ERROR(Render_Vulkan, "Out of bound texture"); - break; - } - MortonCopyTile(stride, tile_buffer, gpu_buffer); - tile_buffer += tile_size; - current_paddr += tile_size; - gpubuf_next_tile(); - } - - if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { - std::array tmp_buf; - MortonCopyTile(stride, &tmp_buf[0], gpu_buffer); - std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); - } -} - -static constexpr std::array morton_to_gpu_fns = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, // 5 - 13 - MortonCopy, // 14 - nullptr, // 15 - MortonCopy, // 16 - MortonCopy // 17 -}; - -static constexpr std::array gpu_to_morton_fns = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, // 5 - 13 - MortonCopy, // 14 - nullptr, // 15 - MortonCopy, // 16 - MortonCopy // 17 -}; - -inline vk::ImageSubresourceRange SubResourceLayersToRange(const vk::ImageSubresourceLayers& in) { - vk::ImageSubresourceRange out; - out.aspectMask = in.aspectMask; - out.baseArrayLayer = in.baseArrayLayer; - out.layerCount = in.layerCount; - out.baseMipLevel = in.mipLevel; - out.levelCount = 1; - return out; -} - -static bool BlitTextures(const Surface& src_surface, const Common::Rectangle& src_rect, - const Surface& dst_surface, const Common::Rectangle& dst_rect, - u32 src_level = 0, u32 dst_level = 0) { - vk::ImageSubresourceLayers src_range{{}, src_level, 0, 1}; - vk::ImageSubresourceLayers dst_range{{}, dst_level, 0, 1}; - - auto GetAspect = [](SurfaceType type) -> vk::ImageAspectFlags { - switch (type) { - case SurfaceParams::SurfaceType::Color: - case SurfaceParams::SurfaceType::Texture: - return vk::ImageAspectFlagBits::eColor; - case SurfaceParams::SurfaceType::Depth: - return vk::ImageAspectFlagBits::eDepth; - case SurfaceParams::SurfaceType::DepthStencil: - return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; - default: - UNIMPLEMENTED(); - return vk::ImageAspectFlagBits::eNone; - } - }; - - src_range.aspectMask = GetAspect(src_surface->type); - dst_range.aspectMask = GetAspect(dst_surface->type); - - // Prepare images for transfer - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - - auto& src_texture = src_surface->texture; - src_texture.Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal); - - auto& dst_texture = dst_surface->texture; - dst_texture.Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); - - const std::array src_offsets{ - vk::Offset3D{static_cast(src_rect.left), static_cast(src_rect.bottom), 0}, - vk::Offset3D{static_cast(src_rect.right), static_cast(src_rect.top), 1} - }; - - const std::array dst_offsets{ - vk::Offset3D{static_cast(dst_rect.left), static_cast(dst_rect.bottom), 0}, - vk::Offset3D{static_cast(dst_rect.right), static_cast(dst_rect.top), 1} - }; - - vk::ImageBlit blit_area{src_range, src_offsets, dst_range, dst_offsets}; - cmdbuffer.blitImage(src_texture.GetHandle(), vk::ImageLayout::eTransferSrcOptimal, - dst_texture.GetHandle(), vk::ImageLayout::eTransferDstOptimal, - {blit_area}, vk::Filter::eNearest); - - // Revert changes to the layout - src_texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - dst_texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - - return true; -} - -static bool FillSurface(const Surface& surface, std::array fill_buffer, - Common::Rectangle rect) { - if (surface->GetScaledRect() != rect) { - // TODO: use vkCmdClearAttachments to clear subrects - LOG_ERROR(Render_Vulkan, "Partial surface fills not implemented"); - return false; - } - - vk::ImageSubresourceRange image_range{{}, 0, 1, 0, 1}; - switch (surface->type) { - case SurfaceParams::SurfaceType::Color: - case SurfaceParams::SurfaceType::Texture: - image_range.aspectMask = vk::ImageAspectFlagBits::eColor; - break; - case SurfaceParams::SurfaceType::Depth: - image_range.aspectMask = vk::ImageAspectFlagBits::eDepth; - break; - case SurfaceParams::SurfaceType::DepthStencil: - image_range.aspectMask = - vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; - break; - default: - UNIMPLEMENTED(); - } - - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - switch (surface->type) { - case SurfaceParams::SurfaceType::Color: - case SurfaceParams::SurfaceType::Texture: { - Pica::Texture::TextureInfo tex_info{}; - tex_info.format = static_cast(surface->pixel_format); - - auto color_vec = Pica::Texture::LookupTexture(fill_buffer.data(), 0, 0, tex_info) / 255.0f; - const std::array color{color_vec.x, color_vec.y, color_vec.z, color_vec.w}; - - auto& texture = surface->texture; - texture.Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); - - cmdbuffer.clearColorImage(texture.GetHandle(), vk::ImageLayout::eTransferDstOptimal, - color, image_range); - texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - return true; - } - case SurfaceParams::SurfaceType::Depth: - case SurfaceParams::SurfaceType::DepthStencil: { - auto& texture = surface->texture; - texture.Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); - - u32 value_32bit = 0; - vk::ClearDepthStencilValue clear_value; - - if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { - std::memcpy(&value_32bit, fill_buffer.data(), sizeof(u16)); - clear_value.depth = value_32bit / 65535.0f; // 2^16 - 1 - } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { - std::memcpy(&value_32bit, fill_buffer.data(), 3); - clear_value.depth = value_32bit / 16777215.0f; // 2^24 - 1 - } else { - std::memcpy(&value_32bit, fill_buffer.data(), sizeof(u32)); - clear_value.depth = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - clear_value.stencil = value_32bit >> 24; - } - - cmdbuffer.clearDepthStencilImage(texture.GetHandle(), vk::ImageLayout::eTransferDstOptimal, - clear_value, image_range); - texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); - return true; - } - default: - LOG_CRITICAL(Render_Vulkan, "Unsupported fill operation requested!"); - return false; - } -} - -static vk::Rect2D FromRect(Common::Rectangle rect) { - vk::Offset2D offset{static_cast(rect.left), static_cast(rect.bottom)}; - vk::Extent2D extent{rect.GetWidth(), rect.GetHeight()}; - return vk::Rect2D{offset, extent}; -} - -// Allocate an uninitialized texture of appropriate size and format for the surface -void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, vk::Format format, - u32 width, u32 height, bool framebuffer) { - // First check if the texture can be recycled - auto recycled_tex = host_texture_recycler.find({format, width, height}); - if (recycled_tex != host_texture_recycler.end()) { - target = std::move(recycled_tex->second); - host_texture_recycler.erase(recycled_tex); - return; - } - - auto GetUsage = [framebuffer](SurfaceType type) { - auto usage = vk::ImageUsageFlagBits::eSampled | - vk::ImageUsageFlagBits::eTransferDst | - vk::ImageUsageFlagBits::eTransferSrc; - - if (framebuffer) { - switch (type) { - case SurfaceType::Color: - case SurfaceType::Fill: - case SurfaceType::Texture: - usage |= vk::ImageUsageFlagBits::eColorAttachment; - break; - case SurfaceType::Depth: - case SurfaceType::DepthStencil: - usage |= vk::ImageUsageFlagBits::eDepthStencilAttachment; - break; - default: - break; - } - } - return usage; - }; - - // Otherwise create a brand new texture - u32 levels = static_cast(std::log2(std::max(width, height))) + 1; - Texture::Info texture_info{ - .width = width, - .height = height, - .format = format, - .type = vk::ImageType::e2D, - .view_type = vk::ImageViewType::e2D, - .usage = GetUsage(type), - .levels = levels - }; - - auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); - - target.Destroy(); - target.Create(texture_info); - target.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); -} - -CachedSurface::~CachedSurface() { - if (texture.IsValid()) { - auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8), - custom_tex_info.width, custom_tex_info.height} - : HostTextureTag{GetFormatTuple(pixel_format), GetScaledWidth(), - GetScaledHeight()}; - - owner.host_texture_recycler.emplace(tag, std::move(texture)); - } -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * 8 != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); - std::vector fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; - } - return true; - } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; -} - -MICROPROFILE_DEFINE(Vulkan_CopySurface, "Vulkan", "CopySurface", MP_RGB(128, 192, 64)); -void RasterizerCacheVulkan::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - MICROPROFILE_SCOPE(Vulkan_CopySurface); - - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - - ASSERT(src_surface != dst_surface); - - // This is only called when CanCopy is true, no need to run checks here - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u32 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array fill_buffer; - - u32 fill_buff_pos = fill_offset; - for (int i : {0, 1, 2, 3}) - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - - FillSurface(dst_surface, fill_buffer, dst_surface->GetScaledSubRect(subrect_params)); - return; - } - if (src_surface->CanSubRect(subrect_params)) { - BlitTextures(src_surface, src_surface->GetScaledSubRect(subrect_params), - dst_surface, dst_surface->GetScaledSubRect(subrect_params)); - return; - } - - UNREACHABLE(); -} - -MICROPROFILE_DEFINE(Vulkan_SurfaceLoad, "Vulkan", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) { - ASSERT(type != SurfaceType::Fill); - const bool need_swap = (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); - - const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr); - if (texture_src_data == nullptr) { - return; - } - - if (vk_buffer.empty()) { - vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); - } - - // TODO: Should probably be done in ::Memory:: and check for other regions too - if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) - load_end = Memory::VRAM_VADDR_END; - - if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) - load_start = Memory::VRAM_VADDR; - - MICROPROFILE_SCOPE(Vulkan_SurfaceLoad); - - ASSERT(load_start >= addr && load_end <= end); - const u32 start_offset = load_start - addr; - - if (!is_tiled) { - ASSERT(type == SurfaceType::Color); - if (need_swap) { - // TODO(liushuyu): check if the byteswap here is 100% correct - // cannot fully test this - if (pixel_format == PixelFormat::RGBA8) { - for (std::size_t i = start_offset; i < load_end - addr; i += 4) { - vk_buffer[i] = texture_src_data[i + 3]; - vk_buffer[i + 1] = texture_src_data[i + 2]; - vk_buffer[i + 2] = texture_src_data[i + 1]; - vk_buffer[i + 3] = texture_src_data[i]; - } - } else if (pixel_format == PixelFormat::RGB8) { - for (std::size_t i = start_offset; i < load_end - addr; i += 3) { - vk_buffer[i] = texture_src_data[i + 2]; - vk_buffer[i + 1] = texture_src_data[i + 1]; - vk_buffer[i + 2] = texture_src_data[i]; - } - } - } else { - std::memcpy(&vk_buffer[start_offset], texture_src_data + start_offset, - load_end - load_start); - } - } else { - if (type == SurfaceType::Texture) { - Pica::Texture::TextureInfo tex_info{}; - tex_info.width = width; - tex_info.height = height; - tex_info.format = static_cast(pixel_format); - tex_info.SetDefaultStride(); - tex_info.physical_address = addr; - - const SurfaceInterval load_interval(load_start, load_end); - const auto rect = GetSubRect(FromInterval(load_interval)); - ASSERT(FromInterval(load_interval).GetInterval() == load_interval); - - for (unsigned y = rect.bottom; y < rect.top; ++y) { - for (unsigned x = rect.left; x < rect.right; ++x) { - auto vec4 = - Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); - const std::size_t offset = (x + (width * y)) * 4; - std::memcpy(&vk_buffer[offset], vec4.AsArray(), 4); - } - } - } else { - morton_to_gpu_fns[static_cast(pixel_format)](stride, height, &vk_buffer[0], - addr, load_start, load_end); - } - } -} - -MICROPROFILE_DEFINE(Vulkan_SurfaceFlush, "Vulkan", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGPUBuffer(PAddr flush_start, PAddr flush_end) { - u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); - if (dst_buffer == nullptr) - return; - - ASSERT(vk_buffer.size() == width * height * GetBytesPerPixel(pixel_format)); - - // TODO: Should probably be done in ::Memory:: and check for other regions too - // same as loadglbuffer() - if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) - flush_end = Memory::VRAM_VADDR_END; - - if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) - flush_start = Memory::VRAM_VADDR; - - MICROPROFILE_SCOPE(Vulkan_SurfaceFlush); - - ASSERT(flush_start >= addr && flush_end <= end); - const u32 start_offset = flush_start - addr; - const u32 end_offset = flush_end - addr; - - if (type == SurfaceType::Fill) { - const u32 coarse_start_offset = start_offset - (start_offset % fill_size); - const u32 backup_bytes = start_offset % fill_size; - std::array backup_data; - if (backup_bytes) - std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - - for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { - std::memcpy(&dst_buffer[offset], &fill_data[0], - std::min(fill_size, end_offset - offset)); - } - - if (backup_bytes) - std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!is_tiled) { - if (pixel_format == PixelFormat::RGBA8) { - for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { - dst_buffer[i] = vk_buffer[i + 3]; - dst_buffer[i + 1] = vk_buffer[i + 2]; - dst_buffer[i + 2] = vk_buffer[i + 1]; - dst_buffer[i + 3] = vk_buffer[i]; - } - } else if (pixel_format == PixelFormat::RGB8) { - for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { - dst_buffer[i] = vk_buffer[i + 2]; - dst_buffer[i + 1] = vk_buffer[i + 1]; - dst_buffer[i + 2] = vk_buffer[i]; - } - } else { - std::memcpy(dst_buffer + start_offset, &vk_buffer[start_offset], - flush_end - flush_start); - } - } else { - gpu_to_morton_fns[static_cast(pixel_format)](stride, height, &vk_buffer[0], - addr, flush_start, flush_end); - } -} - -MICROPROFILE_DEFINE(Vulkan_TextureUL, "Vulkan", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGPUTexture(Common::Rectangle rect) { - if (type == SurfaceType::Fill) - return; - - MICROPROFILE_SCOPE(Vulkan_TextureUL); - - ASSERT(vk_buffer.size() == width * height * GetBytesPerPixel(pixel_format)); - - // TODO: Handle resolution scaling and custom textures - - // Load data from memory to the surface - auto buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); - auto update_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format); - std::span memory{vk_buffer.data() + buffer_offset, update_size}; - - texture.Upload(0, 0, stride, memory); - - InvalidateAllWatcher(); -} - -MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGPUTexture(const Common::Rectangle& rect) { - if (type == SurfaceType::Fill) { - return; - } - - MICROPROFILE_SCOPE(OpenGL_TextureDL); - - if (vk_buffer.empty()) { - vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); - } - - // TODO: Handle resolution scaling and custom textures - - auto buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); - auto buffer_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format); - std::span memory(vk_buffer.data() + buffer_offset, buffer_size); - - texture.Download(0, 0, stride, FromRect(rect), memory); -} - -enum MatchFlags { - Invalid = 1, // Flag that can be applied to other match types, invalid matches require - // validation before they can be used - Exact = 1 << 1, // Surfaces perfectly match - SubRect = 1 << 2, // Surface encompasses params - Copy = 1 << 3, // Surface we can copy from - Expand = 1 << 4, // Surface that can expand params - TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters -}; - -static constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { - return static_cast(static_cast(lhs) | static_cast(rhs)); -} - -/// Get the best surface match (and its match type) for the given flags -template -static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - std::optional validate_interval = std::nullopt) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (const auto& surface : pair.second) { - const bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - find_flags & MatchFlags::Copy - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if (!(find_flags & MatchFlags::Invalid) && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if (!(find_flags & check_type)) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - ASSERT(validate_interval); - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } - } - return match_surface; -} - -RasterizerCacheVulkan::RasterizerCacheVulkan() { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); - //format_reinterpreter = std::make_unique(); -} - -RasterizerCacheVulkan::~RasterizerCacheVulkan() { -#ifndef ANDROID - // This is for switching renderers, which is unsupported on Android, and costly on shutdown - ClearAll(false); -#endif -} - -MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); -bool RasterizerCacheVulkan::BlitSurfaces(const Surface& src_surface, - const Common::Rectangle& src_rect, - const Surface& dst_surface, - const Common::Rectangle& dst_rect) { - MICROPROFILE_SCOPE(OpenGL_BlitSurface); - - if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) - return false; - - dst_surface->InvalidateAllWatcher(); - return BlitTextures(src_surface, src_rect, dst_surface, dst_rect); -} - -Surface RasterizerCacheVulkan::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find - // it to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create, - bool framebuffer) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, Common::Rectangle{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params, framebuffer); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = - new_params.size / aligned_params.BytesInPixels(aligned_params.stride); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); -} - -Surface RasterizerCacheVulkan::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - Pica::Texture::TextureInfo info = - Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - return GetTextureSurface(info, config.config.lod.max_level); -} - -Surface RasterizerCacheVulkan::GetTextureSurface(const Pica::Texture::TextureInfo& info, - u32 max_level) { - if (info.physical_address == 0) { - return nullptr; - } - - SurfaceParams params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.is_tiled = true; - params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); - //params.res_scale = texture_filterer->IsNull() ? 1 : resolution_scale_factor; - params.res_scale = 1; - params.UpdateParams(); - - u32 min_width = info.width >> max_level; - u32 min_height = info.height >> max_level; - if (min_width % 8 != 0 || min_height % 8 != 0) { - LOG_CRITICAL(Render_Vulkan, "Texture size ({}x{}) is not multiple of 8", min_width, - min_height); - return nullptr; - } - if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { - LOG_CRITICAL(Render_Vulkan, - "Texture size ({}x{}) does not support required mipmap level ({})", - params.width, params.height, max_level); - return nullptr; - } - - auto surface = GetSurface(params, ScaleMatch::Ignore, true); - if (!surface) - return nullptr; - - // Update mipmap if necessary - if (max_level != 0) { - if (max_level >= 8) { - // since PICA only supports texture size between 8 and 1024, there are at most eight - // possible mipmap levels including the base. - LOG_CRITICAL(Render_Vulkan, "Unsupported mipmap level {}", max_level); - return nullptr; - } - - SurfaceParams surface_params = *surface; - for (u32 level = 1; level <= max_level; ++level) { - // In PICA all mipmap levels are stored next to each other - surface_params.addr += - surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; - surface_params.width /= 2; - surface_params.height /= 2; - surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it - surface_params.UpdateParams(); - auto& watcher = surface->level_watchers[level - 1]; - if (!watcher || !watcher->Get()) { - auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); - if (level_surface) { - watcher = level_surface->CreateWatcher(); - } else { - watcher = nullptr; - } - } - - if (watcher && !watcher->IsValid()) { - auto level_surface = watcher->Get(); - if (!level_surface->invalid_regions.empty()) { - ValidateSurface(level_surface, level_surface->addr, level_surface->size); - } - - if (!surface->is_custom /*&& texture_filterer->IsNull()*/) { - BlitTextures(level_surface, level_surface->GetScaledRect(), - surface, surface_params.GetScaledRect(), - 0, level); - } - watcher->Validate(); - } - } - } - - return surface; -} - -SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const Common::Rectangle& viewport_rect) { - const auto& regs = Pica::g_state.regs; - const auto& config = regs.framebuffer.framebuffer; - - // update resolution_scale_factor and reset cache if changed - /*if ((resolution_scale_factor != VideoCore::GetResolutionScaleFactor()) | - (VideoCore::g_texture_filter_update_requested.exchange(false) && - texture_filterer->Reset(Settings::values.texture_filter_name, resolution_scale_factor))) { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - texture_cube_cache.clear(); - }*/ - - Common::Rectangle viewport_clamped{ - static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), - static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), - static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), - static_cast( - std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; - - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.GetWidth(); - color_params.height = config.GetHeight(); - SurfaceParams depth_params = color_params; - - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); - color_params.UpdateParams(); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(Render_Vulkan, "Color and depth framebuffer memory regions overlap!"); - using_depth_fb = false; - } - - Common::Rectangle color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false, true); - - Common::Rectangle depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false, true); - - Common::Rectangle fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { - fb_rect = color_rect; - // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || - color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); - } - } else if (color_surface != nullptr) { - fb_rect = color_rect; - } else if (depth_surface != nullptr) { - fb_rect = depth_rect; - } - - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - color_surface->InvalidateAllWatcher(); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - depth_surface->InvalidateAllWatcher(); - } - - return std::make_tuple(color_surface, depth_surface, fb_rect); -} - -Surface RasterizerCacheVulkan::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - Surface new_surface = std::make_shared(*this); - - new_surface->addr = config.GetStartAddress(); - new_surface->end = config.GetEndAddress(); - new_surface->size = new_surface->end - new_surface->addr; - new_surface->type = SurfaceType::Fill; - new_surface->res_scale = std::numeric_limits::max(); - - std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); - if (config.fill_32bit) { - new_surface->fill_size = 4; - } else if (config.fill_24bit) { - new_surface->fill_size = 3; - } else { - new_surface->fill_size = 2; - } - - RegisterSurface(new_surface); - return new_surface; -} - -SurfaceRect_Tuple RasterizerCacheVulkan::GetTexCopySurface(const SurfaceParams& params) { - Common::Rectangle rect{}; - - Surface match_surface = FindMatch( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; - match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return std::make_tuple(match_surface, rect); -} - -void RasterizerCacheVulkan::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (const auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCacheVulkan::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { - if (size == 0) - return; - - const SurfaceInterval validate_interval(addr, addr + size); - - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - auto validate_regions = surface->invalid_regions & validate_interval; - auto notify_validated = [&](SurfaceInterval interval) { - surface->invalid_regions.erase(interval); - validate_regions.erase(interval); - }; - - while (true) { - const auto it = validate_regions.begin(); - if (it == validate_regions.end()) - break; - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = surface->FromInterval(interval); - - Surface copy_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - notify_validated(copy_interval); - continue; - } - - // Try to find surface in cache with different format - // that can can be reinterpreted to the requested format. - if (ValidateByReinterpretation(surface, params, interval)) { - notify_validated(interval); - continue; - } - // Could not find a matching reinterpreter, check if we need to implement a - // reinterpreter - if (NoUnimplementedReinterpretations(surface, params, interval) && - !IntervalHasInvalidPixelFormat(params, interval)) { - // No surfaces were found in the cache that had a matching bit-width. - // If the region was created entirely on the GPU, - // assume it was a developer mistake and skip flushing. - if (boost::icl::contains(dirty_regions, interval)) { - LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is " - "invalid. Skipping validation"); - validate_regions.erase(interval); - continue; - } - } - - // Load data from 3DS memory - FlushRegion(params.addr, params.size); - surface->LoadGPUBuffer(params.addr, params.end); - surface->UploadGPUTexture(surface->GetSubRect(params)); - notify_validated(params.GetInterval()); - } -} - -bool RasterizerCacheVulkan::NoUnimplementedReinterpretations(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval) { - static constexpr std::array all_formats{ - PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, - PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, - PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, - PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, - PixelFormat::D24S8, - }; - bool implemented = true; - for (PixelFormat format : all_formats) { - if (SurfaceParams::GetFormatBpp(format) == surface->GetFormatBpp()) { - params.pixel_format = format; - // This could potentially be expensive, - // although experimentally it hasn't been too bad - Surface test_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (test_surface != nullptr) { - LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", - SurfaceParams::PixelFormatAsString(format), - SurfaceParams::PixelFormatAsString(surface->pixel_format)); - implemented = false; - } - } - } - return implemented; -} - -bool RasterizerCacheVulkan::IntervalHasInvalidPixelFormat(SurfaceParams& params, - const SurfaceInterval& interval) { - params.pixel_format = PixelFormat::Invalid; - for (const auto& set : RangeFromInterval(surface_cache, interval)) - for (const auto& surface : set.second) - if (surface->pixel_format == PixelFormat::Invalid) { - LOG_WARNING(Render_OpenGL, "Surface found with invalid pixel format"); - return true; - } - return false; -} - -bool RasterizerCacheVulkan::ValidateByReinterpretation(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval) { - /*auto [cvt_begin, cvt_end] = - format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format); - for (auto reinterpreter = cvt_begin; reinterpreter != cvt_end; ++reinterpreter) { - PixelFormat format = reinterpreter->first.src_format; - params.pixel_format = format; - Surface reinterpret_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - - if (reinterpret_surface != nullptr) { - SurfaceInterval reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); - SurfaceParams reinterpret_params = surface->FromInterval(reinterpret_interval); - auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); - auto dest_rect = surface->GetScaledSubRect(reinterpret_params); - - reinterpreter->second->Reinterpret(reinterpret_surface, src_rect, surface, dest_rect); - return true; - } - }*/ - return false; -} - -void RasterizerCacheVulkan::ClearAll(bool flush) { - const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF); - // Force flush all surfaces from the cache - if (flush) { - FlushRegion(0x0, 0xFFFFFFFF); - } - // Unmark all of the marked pages - for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) { - const auto interval = pair.first & flush_interval; - - const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; - const u32 interval_size = interval_end_addr - interval_start_addr; - - VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, false); - } - - // Remove the whole cache without really looking at it. - cached_pages -= flush_interval; - dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); - surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF); - remove_surfaces.clear(); -} - -void RasterizerCacheVulkan::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { - std::lock_guard lock{mutex}; - - if (size == 0) - return; - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to - // access that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface != nullptr && surface != flush_surface) - continue; - - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); - - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGPUTexture(surface->GetSubRect(params)); - } - - surface->FlushGPUBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); - flushed_intervals += interval; - } - // Reset dirty regions - dirty_regions -= flushed_intervals; -} - -void RasterizerCacheVulkan::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); -} - -void RasterizerCacheVulkan::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { - std::lock_guard lock{mutex}; - - if (size == 0) - return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(region_owner->type != SurfaceType::Texture); - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); - } - - for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (const auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - cached_surface->InvalidateAllWatcher(); - - // If the surface has no salvageable data it should be removed from the cache to avoid - // clogging the data structure - if (cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } - } - - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (const auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); - } - - remove_surfaces.clear(); -} - -Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params, bool framebuffer) { - Surface surface = std::make_shared(*this); - static_cast(*surface) = params; - - surface->invalid_regions.insert(surface->GetInterval()); - AllocateTexture(surface->texture, params.type, GetFormatTuple(surface->pixel_format), - surface->GetScaledWidth(), surface->GetScaledHeight(), framebuffer); - return surface; -} - -void RasterizerCacheVulkan::RegisterSurface(const Surface& surface) { - std::lock_guard lock{mutex}; - - if (surface->registered) { - return; - } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - UpdatePagesCachedCount(surface->addr, surface->size, 1); -} - -void RasterizerCacheVulkan::UnregisterSurface(const Surface& surface) { - std::lock_guard lock{mutex}; - - if (!surface->registered) { - return; - } - surface->registered = false; - UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); -} - -void RasterizerCacheVulkan::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { - const u32 num_pages = - ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; - const u32 page_start = addr >> Memory::PAGE_BITS; - const u32 page_end = page_start + num_pages; - - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) - cached_pages.add({pages_interval, delta}); - - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; - - const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; - const u32 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) - VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, - true); - else if (delta < 0 && count == -delta) - VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, - false); - else - ASSERT(count >= 0); - } - - if (delta < 0) - cached_pages.add({pages_interval, delta}); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h deleted file mode 100644 index 86c01d3e5..000000000 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h +++ /dev/null @@ -1,346 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "common/assert.h" -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "core/custom_tex_cache.h" -#include "video_core/renderer_vulkan/vk_surface_params.h" -#include "video_core/renderer_vulkan/vk_texture.h" -#include "video_core/texture/texture_decode.h" - -// Can be changed later here -template -using HashMap = robin_hood::unordered_flat_map; - -namespace Vulkan { - -class RasterizerCacheVulkan; -class TextureFilterer; -class FormatReinterpreterVulkan; - -vk::Format GetFormatTuple(SurfaceParams::PixelFormat pixel_format); - -struct HostTextureTag { - vk::Format format = vk::Format::eUndefined; - u32 width = 0, height = 0; - - // Enable comparisons - auto operator<=>(const HostTextureTag& other) const = default; -}; - -struct TextureCubeConfig { - PAddr px = 0; - PAddr nx = 0; - PAddr py = 0; - PAddr ny = 0; - PAddr pz = 0; - PAddr nz = 0; - u32 width = 0; - Pica::TexturingRegs::TextureFormat format; - - // Enable comparisons - auto operator<=>(const TextureCubeConfig& other) const = default; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, tag.format); - boost::hash_combine(hash, tag.width); - boost::hash_combine(hash, tag.height); - return hash; - } -}; - -template <> -struct hash { - std::size_t operator()(const Vulkan::TextureCubeConfig& config) const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, config.px); - boost::hash_combine(hash, config.nx); - boost::hash_combine(hash, config.py); - boost::hash_combine(hash, config.ny); - boost::hash_combine(hash, config.pz); - boost::hash_combine(hash, config.nz); - boost::hash_combine(hash, config.width); - boost::hash_combine(hash, static_cast(config.format)); - return hash; - } -}; -} // namespace std - -namespace Vulkan { - -using SurfaceSet = std::set; - -using SurfaceRegions = boost::icl::interval_set; -using SurfaceMap = - boost::icl::interval_map; - -using SurfaceCache = - boost::icl::interval_map; - -static_assert(std::is_same() && - std::is_same(), - "incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple>; -using SurfaceSurfaceRect_Tuple = std::tuple>; - -enum class ScaleMatch { - Exact, // only accept same res scale - Upscale, // only allow higher scale than params - Ignore // accept every scaled res -}; - -/** - * A watcher that notifies whether a cached surface has been changed. This is useful for caching - * surface collection objects, including texture cube and mipmap. - */ -struct SurfaceWatcher { -public: - explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} - - /** - * Checks whether the surface has been changed. - * @return false if the surface content has been changed since last Validate() call or has been - * destroyed; otherwise true - */ - bool IsValid() const { - return !surface.expired() && valid; - } - - /// Marks that the content of the referencing surface has been updated to the watcher user. - void Validate() { - ASSERT(!surface.expired()); - valid = true; - } - - /// Gets the referencing surface. Returns null if the surface has been destroyed - Surface Get() const { - return surface.lock(); - } - -private: - friend struct CachedSurface; - std::weak_ptr surface; - bool valid = false; -}; - -class RasterizerCacheVulkan; - -struct CachedSurface : SurfaceParams, std::enable_shared_from_this { - CachedSurface(RasterizerCacheVulkan& owner) : owner{owner} {} - ~CachedSurface(); - - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } - - bool IsSurfaceFullyInvalid() const { - auto interval = GetInterval(); - return *invalid_regions.equal_range(interval).first == interval; - } - - bool registered = false; - SurfaceRegions invalid_regions; - - u32 fill_size = 0; /// Number of bytes to read from fill_data - std::array fill_data; - - Texture texture; - - /// max mipmap level that has been attached to the texture - u32 max_level = 0; - /// level_watchers[i] watches the (i+1)-th level mipmap source surface - std::array, 7> level_watchers; - - bool is_custom = false; - Core::CustomTexInfo custom_tex_info; - - static constexpr unsigned int GetBytesPerPixel(PixelFormat format) { - return format == PixelFormat::Invalid - ? 0 - : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) - ? 4 - : SurfaceParams::GetFormatBpp(format) / 8; - } - - std::vector vk_buffer; - - // Read/Write data in 3DS memory to/from gl_buffer - void LoadGPUBuffer(PAddr load_start, PAddr load_end); - void FlushGPUBuffer(PAddr flush_start, PAddr flush_end); - - // Upload/Download data in vk_buffer in/to this surface's texture - void UploadGPUTexture(Common::Rectangle rect); - void DownloadGPUTexture(const Common::Rectangle& rect); - - std::shared_ptr CreateWatcher() { - auto watcher = std::make_shared(weak_from_this()); - watchers.push_front(watcher); - return watcher; - } - - void InvalidateAllWatcher() { - for (const auto& watcher : watchers) { - if (auto locked = watcher.lock()) { - locked->valid = false; - } - } - } - - void UnlinkAllWatcher() { - for (const auto& watcher : watchers) { - if (auto locked = watcher.lock()) { - locked->valid = false; - locked->surface.reset(); - } - } - watchers.clear(); - } - -private: - RasterizerCacheVulkan& owner; - std::list> watchers; -}; - -struct CachedTextureCube { - Texture texture; - u16 res_scale = 1; - std::shared_ptr px; - std::shared_ptr nx; - std::shared_ptr py; - std::shared_ptr ny; - std::shared_ptr pz; - std::shared_ptr nz; -}; - -class TextureDownloader; - -class RasterizerCacheVulkan : NonCopyable { -public: - RasterizerCacheVulkan(); - ~RasterizerCacheVulkan(); - - /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, - const Surface& dst_surface, const Common::Rectangle& dst_rect); - - /// Copy one surface's region to another - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval); - - /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) - Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from - /// 3DS memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create, bool framebuffer = false); - - /// Get a surface based on the texture configuration - Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); - Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); - - /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const Common::Rectangle& viewport_rect); - - /// Get a surface that matches the fill config - Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); - - /// Get a surface that matches a "texture copy" display transfer config - SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); - - /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); - - /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) - void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); - - /// Flush all cached resources tracked by this cache manager - void FlushAll(); - - /// Clear all cached resources tracked by this cache manager - void ClearAll(bool flush); - - // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead - // in the driver - // this must be placed above the surface_cache to ensure all cached surfaces are destroyed - // before destroying the recycler - std::unordered_multimap host_texture_recycler; - -private: - void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); - - /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, PAddr addr, u32 size); - - // Returns false if there is a surface in the cache at the interval with the same bit-width, - bool NoUnimplementedReinterpretations(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval); - - // Return true if a surface with an invalid pixel format exists at the interval - bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); - - // Attempt to find a reinterpretable surface in the cache and use it to copy for validation - bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, - const SurfaceInterval& interval); - - /// Create a new surface - Surface CreateSurface(const SurfaceParams& params, bool framebuffer = false); - - /// Register surface into the cache - void RegisterSurface(const Surface& surface); - - /// Remove surface from the cache - void UnregisterSurface(const Surface& surface); - - /// Increase/decrease the number of surface in pages touching the specified region - void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); - - SurfaceCache surface_cache; - boost::icl::interval_map cached_pages; - SurfaceMap dirty_regions; - SurfaceSet remove_surfaces; - - u16 resolution_scale_factor; - - // Texture cube cache - std::unordered_map texture_cube_cache; - - std::recursive_mutex mutex; - -public: - void AllocateTexture(Texture& target, SurfaceParams::SurfaceType type, vk::Format format, - u32 width, u32 height, bool framebuffer); -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 9f915dee8..3a5126919 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -66,13 +66,11 @@ layout (set = 0, binding = 0) uniform shader_data { }; )"; -static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) { +static std::string GetVertexInterfaceDeclaration(bool is_output) { std::string out; const auto append_variable = [&](std::string_view var, int location) { - if (separable_shader) { - out += fmt::format("layout(location = {}) ", location); - } + out += fmt::format("layout(location = {}) ", location); out += fmt::format("{}{};\n", is_output ? "out " : "in ", var); }; @@ -84,7 +82,7 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_ append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); append_variable("vec3 view", ATTRIBUTE_VIEW); - if (is_output && separable_shader) { + if (is_output) { // gl_PerVertex redeclaration is required for separate shader object out += R"( out gl_PerVertex { @@ -1027,7 +1025,7 @@ float ProcTexNoiseCoef(vec2 x) { } } -std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config, bool seperable_shader) { +std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config) { const auto& state = config; std::string out; @@ -1037,7 +1035,7 @@ std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config, )"; out += "#extension GL_ARB_separate_shader_objects : enable\n"; - out += GetVertexInterfaceDeclaration(false, true); + out += GetVertexInterfaceDeclaration(false); out += R"( in vec4 gl_FragCoord; @@ -1379,7 +1377,7 @@ do { return out; } -std::string ShaderGenerator::GenerateTrivialVertexShader(bool separable_shader) { +std::string ShaderGenerator::GenerateTrivialVertexShader() { std::string out; out += "#version 450\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n"; @@ -1394,7 +1392,7 @@ std::string ShaderGenerator::GenerateTrivialVertexShader(bool separable_shader) ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); - out += GetVertexInterfaceDeclaration(true, separable_shader); + out += GetVertexInterfaceDeclaration(true); out += UniformBlockDef; @@ -1419,13 +1417,12 @@ void main() { return out; } -std::string ShaderGenerator::GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, - bool separable_shader) { +std::string ShaderGenerator::GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config) { LOG_CRITICAL(Render_Vulkan, "Unimplemented!"); UNREACHABLE(); } -std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) { +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) { LOG_CRITICAL(Render_Vulkan, "Unimplemented!"); UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 7e9775411..33568ada8 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -13,14 +13,10 @@ public: ShaderGenerator() = default; ~ShaderGenerator() override = default; - std::string GenerateTrivialVertexShader(bool separable_shader) override; - - std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, - bool separable_shader) override; - - std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) override; - - std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) override; + std::string GenerateTrivialVertexShader() override; + std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config) override; + std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) override; + std::string GenerateFragmentShader(const PicaFSConfig& config) override; }; } // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/vk_shader_state.h b/src/video_core/renderer_vulkan/vk_shader_state.h deleted file mode 100644 index 45cbdb8bc..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_state.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/hash.h" -#include "video_core/regs.h" -#include "video_core/shader/shader.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { - -/// Structure that the hardware rendered vertices are composed of -struct HardwareVertex { - HardwareVertex() = default; - HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { - position[0] = v.pos.x.ToFloat32(); - position[1] = v.pos.y.ToFloat32(); - position[2] = v.pos.z.ToFloat32(); - position[3] = v.pos.w.ToFloat32(); - color[0] = v.color.x.ToFloat32(); - color[1] = v.color.y.ToFloat32(); - color[2] = v.color.z.ToFloat32(); - color[3] = v.color.w.ToFloat32(); - tex_coord0[0] = v.tc0.x.ToFloat32(); - tex_coord0[1] = v.tc0.y.ToFloat32(); - tex_coord1[0] = v.tc1.x.ToFloat32(); - tex_coord1[1] = v.tc1.y.ToFloat32(); - tex_coord2[0] = v.tc2.x.ToFloat32(); - tex_coord2[1] = v.tc2.y.ToFloat32(); - tex_coord0_w = v.tc0_w.ToFloat32(); - normquat[0] = v.quat.x.ToFloat32(); - normquat[1] = v.quat.y.ToFloat32(); - normquat[2] = v.quat.z.ToFloat32(); - normquat[3] = v.quat.w.ToFloat32(); - view[0] = v.view.x.ToFloat32(); - view[1] = v.view.y.ToFloat32(); - view[2] = v.view.z.ToFloat32(); - - if (flip_quaternion) { - normquat = -normquat; - } - } - - glm::vec4 position; - glm::vec4 color; - glm::vec2 tex_coord0; - glm::vec2 tex_coord1; - glm::vec2 tex_coord2; - float tex_coord0_w; - glm::vec4 normquat; - glm::vec3 view; -}; - -/** - * Vertex structure that the drawn screen rectangles are composed of. - */ -struct ScreenRectVertex { - ScreenRectVertex() = default; - ScreenRectVertex(float x, float y, float u, float v, float s) { - position.x = x; - position.y = y; - tex_coord.x = u; - tex_coord.y = v; - tex_coord.z = s; - } - - glm::vec2 position; - glm::vec3 tex_coord; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state.cpp b/src/video_core/renderer_vulkan/vk_state.cpp deleted file mode 100644 index 9eff7d6b3..000000000 --- a/src/video_core/renderer_vulkan/vk_state.cpp +++ /dev/null @@ -1,700 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/renderer_vulkan/vk_state.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_gen.h" - -namespace Vulkan { - -std::unique_ptr s_vulkan_state; - -auto IsStencil = [](vk::Format format) -> bool { - switch (format) { - case vk::Format::eD16UnormS8Uint: - case vk::Format::eD24UnormS8Uint: - case vk::Format::eD32SfloatS8Uint: - return true; - default: - return false; - }; -}; - -void DescriptorUpdater::Reset() { - write_count = 0; - buffer_count = 0; - image_count = 0; -} - -void DescriptorUpdater::Update() { - assert(write_count > 0); - - auto device = g_vk_instace->GetDevice(); - device.updateDescriptorSets(write_count, writes.data(), 0, nullptr); - - Reset(); -} - -void DescriptorUpdater::PushTextureArrayUpdate(vk::DescriptorSet set, u32 binding, vk::Sampler sampler, - std::span views) { - ASSERT(image_count < MAX_UPDATES); - - u32 start = image_count; - for (auto& view : views) { - image_infos[image_count++] = {sampler, view, vk::ImageLayout::eShaderReadOnlyOptimal}; - } - - writes[write_count++] = vk::WriteDescriptorSet{set, binding, 0, static_cast(views.size()), - vk::DescriptorType::eCombinedImageSampler, - image_infos.data() + start}; -} - -void DescriptorUpdater::PushCombinedImageSamplerUpdate(vk::DescriptorSet set, u32 binding, - vk::Sampler sampler, vk::ImageView view) { - ASSERT(image_count < MAX_UPDATES); - - image_infos[image_count] = {sampler, view, vk::ImageLayout::eShaderReadOnlyOptimal}; - - writes[write_count++] = vk::WriteDescriptorSet{set, binding, 0, 1, - vk::DescriptorType::eCombinedImageSampler, - &image_infos[image_count++]}; -} - -void DescriptorUpdater::PushBufferUpdate(vk::DescriptorSet set, u32 binding, - vk::DescriptorType buffer_type, u32 offset, u32 size, - vk::Buffer buffer, const vk::BufferView& view) { - ASSERT(buffer_count < MAX_UPDATES); - - buffer_infos[buffer_count] = vk::DescriptorBufferInfo{buffer, offset, size}; - - writes[write_count++] = vk::WriteDescriptorSet{set, binding, 0, 1, buffer_type, nullptr, - &buffer_infos[buffer_count++], - view ? &view : nullptr}; -} - -VulkanState::VulkanState(const std::shared_ptr& swapchain) : swapchain(swapchain) { - // Create a placeholder texture which can be used in place of a real binding. - Texture::Info info{ - .width = 1, - .height = 1, - .format = vk::Format::eR8G8B8A8Unorm, - .type = vk::ImageType::e2D, - .view_type = vk::ImageViewType::e2D, - .usage = vk::ImageUsageFlagBits::eSampled | - vk::ImageUsageFlagBits::eTransferDst - }; - - placeholder.Create(info); - - // Create texture sampler - auto props = g_vk_instace->GetPhysicalDevice().getProperties(); - vk::SamplerCreateInfo sampler_info{ - {}, vk::Filter::eLinear, - vk::Filter::eLinear, - vk::SamplerMipmapMode::eLinear, - vk::SamplerAddressMode::eClampToEdge, - vk::SamplerAddressMode::eClampToEdge, - vk::SamplerAddressMode::eClampToEdge, - {}, true, props.limits.maxSamplerAnisotropy, - false, vk::CompareOp::eAlways, {}, {}, - vk::BorderColor::eIntOpaqueBlack, false - }; - - // TODO: Sampler cache - auto device = g_vk_instace->GetDevice(); - render_sampler = device.createSampler(sampler_info); - present_sampler = device.createSampler(sampler_info); - - // Unbind all texture units - present_view = placeholder.GetView(); - for (int i = 0; i < 4; i++) { - render_views[i] = placeholder.GetView(); - } - - // Configure descriptor sets and pipeline builders - BuildDescriptorLayouts(); - ConfigureRenderPipeline(); - ConfigurePresentPipeline(); -} - -VulkanState::~VulkanState() { - auto device = g_vk_instace->GetDevice(); - device.waitIdle(); - - // Destroy vertex shader - device.destroyShaderModule(render_vertex_shader); - device.destroyShaderModule(present_vertex_shader); - device.destroyShaderModule(present_fragment_shader); - - // Destroy pipeline layouts - device.destroyPipelineLayout(render_pipeline_layout); - device.destroyPipelineLayout(present_pipeline_layout); - - // Destroy descriptor layouts - for (auto& layout : descriptor_layouts) { - device.destroyDescriptorSetLayout(layout); - } - - // Destroy samplers - device.destroySampler(render_sampler); - device.destroySampler(present_sampler); - - // Destroy shaders - for (auto& shader : render_fragment_shaders) { - device.destroyShaderModule(shader.second); - } - - // Destroy pipelines - for (auto& pipeline : render_pipelines) { - device.destroyPipeline(pipeline.second); - } - - device.destroyPipeline(present_pipeline); -} - -void VulkanState::Create(const std::shared_ptr& swapchain) { - if (!s_vulkan_state) { - s_vulkan_state = std::make_unique(swapchain); - } -} - -VulkanState& VulkanState::Get() { - assert(s_vulkan_state); - return *s_vulkan_state; -} - -void VulkanState::SetVertexBuffer(const Buffer& buffer, vk::DeviceSize offset) { - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - cmdbuffer.bindVertexBuffers(0, buffer.GetBuffer(), offset); -} - -void VulkanState::SetUniformBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer) { - auto& set = descriptor_sets[0]; - updater.PushBufferUpdate(set, binding, - vk::DescriptorType::eUniformBuffer, - offset, size, buffer.GetBuffer()); - descriptors_dirty = true; -} - -void VulkanState::SetTexture(u32 binding, const Texture& image) { - auto& set = descriptor_sets[1]; - updater.PushCombinedImageSamplerUpdate(set, binding, render_sampler, image.GetView()); - render_views[binding] = image.GetView(); - descriptors_dirty = true; -} - -void VulkanState::SetTexelBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer, u32 view_index) { - auto& set = descriptor_sets[2]; - updater.PushBufferUpdate(set, binding, - vk::DescriptorType::eUniformTexelBuffer, - offset, size, buffer.GetBuffer(), - buffer.GetView(view_index)); - descriptors_dirty = true; -} - -void VulkanState::SetPresentTextures(vk::ImageView view0, vk::ImageView view1, vk::ImageView view2) { - auto& set = descriptor_sets[3]; - - std::array views{view0, view1, view2}; - updater.PushTextureArrayUpdate(set, 0, present_sampler, views); - descriptors_dirty = true; -} - -void VulkanState::SetPresentData(DrawInfo data) { - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - cmdbuffer.pushConstants(present_pipeline_layout, vk::ShaderStageFlagBits::eFragment | - vk::ShaderStageFlagBits::eVertex, 0, sizeof(data), &data); - -} - -void VulkanState::SetPlaceholderColor(u8 red, u8 green, u8 blue, u8 alpha) { - std::array color{red, green, blue, alpha}; - placeholder.Upload(0, 0, 1, placeholder.GetArea(), color); -} - -void VulkanState::UnbindTexture(const Texture& image) { - for (int i = 0; i < 4; i++) { - if (render_views[i] == image.GetView()) { - render_views[i] = placeholder.GetView(); - updater.PushCombinedImageSamplerUpdate(descriptor_sets[1], i, - render_sampler, render_views[i]); - descriptors_dirty = true; - } - } - - if (present_view == image.GetView()) { - present_view = placeholder.GetView(); - updater.PushCombinedImageSamplerUpdate(descriptor_sets[3], 0, - render_sampler, present_view); - descriptors_dirty = true; - } -} - -void VulkanState::UnbindTexture(u32 unit) { - render_views[unit] = placeholder.GetView(); - updater.PushCombinedImageSamplerUpdate(descriptor_sets[1], unit, - render_sampler, render_views[unit]); - descriptors_dirty = true; -} - -void VulkanState::BeginRendering(Texture* color, Texture* depth, bool update_pipeline_formats, - vk::ClearColorValue color_clear, vk::AttachmentLoadOp color_load_op, - vk::AttachmentStoreOp color_store_op, vk::ClearDepthStencilValue depth_clear, - vk::AttachmentLoadOp depth_load_op, vk::AttachmentStoreOp depth_store_op, - vk::AttachmentLoadOp stencil_load_op, vk::AttachmentStoreOp stencil_store_op) { - // Make sure to exit previous render context - EndRendering(); - - // Make sure attachments are in optimal layout - vk::RenderingInfo render_info{{}, {}, 1, {}}; - std::array infos{}; - - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - if (color != nullptr) { - color->Transition(cmdbuffer, vk::ImageLayout::eColorAttachmentOptimal); - - infos[0] = vk::RenderingAttachmentInfo{ - color->GetView(), color->GetLayout(), {}, {}, {}, - color_load_op, color_store_op, color_clear - }; - - render_info.colorAttachmentCount = 1; - render_info.pColorAttachments = &infos[0]; - render_info.renderArea = color->GetArea(); - } - - if (depth != nullptr) { - depth->Transition(cmdbuffer, vk::ImageLayout::eDepthStencilAttachmentOptimal); - - infos[1] = vk::RenderingAttachmentInfo{ - depth->GetView(), depth->GetLayout(), {}, {}, {}, - depth_load_op, depth_store_op, depth_clear - }; - - render_info.pDepthAttachment = &infos[1]; - - - if (IsStencil(depth->GetFormat())) { - infos[2] = vk::RenderingAttachmentInfo{ - depth->GetView(), depth->GetLayout(), {}, {}, {}, - stencil_load_op, stencil_store_op, depth_clear - }; - - render_info.pStencilAttachment = &infos[2]; - } - } - - if (update_pipeline_formats) { - render_pipeline_key.color = color != nullptr ? - color->GetFormat() : - vk::Format::eUndefined; - render_pipeline_key.depth_stencil = depth != nullptr ? - depth->GetFormat() : - vk::Format::eUndefined; - } - - // Begin rendering - cmdbuffer.beginRendering(render_info); - rendering = true; -} - -void VulkanState::EndRendering() { - if (!rendering) { - return; - } - - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - cmdbuffer.endRendering(); - rendering = false; -} - -void VulkanState::SetViewport(vk::Viewport new_viewport) { - if (new_viewport != viewport) { - viewport = new_viewport; - dirty_flags.set(DynamicStateFlags::Viewport); - } -} - -void VulkanState::SetScissor(vk::Rect2D new_scissor) { - if (new_scissor != scissor) { - scissor = new_scissor; - dirty_flags.set(DynamicStateFlags::Scissor); - } -} - -void VulkanState::SetCullMode(vk::CullModeFlags flags) { - if (cull_mode != flags) { - cull_mode = flags; - dirty_flags.set(DynamicStateFlags::CullMode); - } -} - -void VulkanState::SetFrontFace(vk::FrontFace face) { - if (front_face != face) { - front_face = face; - dirty_flags.set(DynamicStateFlags::FrontFace); - } -} - -void VulkanState::SetColorMask(vk::ColorComponentFlags mask) { - render_pipeline_key.blend_config.colorWriteMask = mask; -} - -void VulkanState::SetLogicOp(vk::LogicOp logic_op) { - render_pipeline_key.blend_logic_op = logic_op; -} - -void VulkanState::SetBlendEnable(bool enable) { - render_pipeline_key.blend_config.blendEnable = enable; -} - -void VulkanState::SetBlendCostants(float red, float green, float blue, float alpha) { - std::array color{red, green, blue, alpha}; - if (color != blend_constants) { - blend_constants = color; - dirty_flags.set(DynamicStateFlags::BlendConstants); - } -} - -void VulkanState::SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::BlendFactor src_color, - vk::BlendFactor dst_color, vk::BlendFactor src_alpha, vk::BlendFactor dst_alpha) { - auto& blend = render_pipeline_key.blend_config; - blend.colorBlendOp = rgb_op; - blend.alphaBlendOp = alpha_op; - blend.srcColorBlendFactor = src_color; - blend.dstColorBlendFactor = dst_color; - blend.srcAlphaBlendFactor = src_alpha; - blend.dstAlphaBlendFactor = dst_alpha; -} - -void VulkanState::SetStencilWrite(u32 mask) { - if (mask != stencil_write_mask) { - stencil_write_mask = mask; - dirty_flags.set(DynamicStateFlags::StencilMask); - } -} - -void VulkanState::SetStencilInput(u32 mask) { - if (mask != stencil_input_mask) { - stencil_input_mask = mask; - dirty_flags.set(DynamicStateFlags::StencilMask); - } -} - -void VulkanState::SetStencilTest(bool enable, vk::StencilOp fail, vk::StencilOp pass, vk::StencilOp depth_fail, - vk::CompareOp compare, u32 ref) { - stencil_enabled = enable; - stencil_ref = ref; - fail_op = fail; - pass_op = pass; - depth_fail_op = depth_fail; - stencil_op = compare; - dirty_flags.set(DynamicStateFlags::StencilTest); -} - -void VulkanState::SetDepthWrite(bool enable) { - if (enable != depth_writes) { - depth_writes = enable; - dirty_flags.set(DynamicStateFlags::DepthWrite); - } -} - -void VulkanState::SetDepthTest(bool enable, vk::CompareOp compare) { - depth_enabled = enable; - depth_op = compare; - dirty_flags.set(DynamicStateFlags::DepthTest); -} - - -void VulkanState::InitDescriptorSets() { - auto pool = g_vk_task_scheduler->GetDescriptorPool(); - auto device = g_vk_instace->GetDevice(); - - // Allocate new sets - vk::DescriptorSetAllocateInfo allocate_info{pool, descriptor_layouts}; - auto sets = device.allocateDescriptorSets(allocate_info); - - // Update them if the previous sets are valid - u32 copy_count = 0; - std::array copies; - - // Copy only valid descriptors - std::array binding_count{2, 4, 3, 1}; - for (int i = 0; i < descriptor_sets.size(); i++) { - if (descriptor_sets[i]) { - for (u32 binding = 0; binding < binding_count[i]; binding++) { - copies[copy_count++] = {descriptor_sets[i], binding, 0, sets[i], binding, 0, 1}; - } - } - } - - if (copy_count < 10) { - // Some descriptors weren't copied and thus need manual updating - descriptors_dirty = true; - } - - device.updateDescriptorSets(0, nullptr, copy_count, copies.data()); - std::copy_n(sets.begin(), descriptor_sets.size(), descriptor_sets.begin()); -} - -void VulkanState::ApplyRenderState(const Pica::Regs& regs) { - // Update any pending texture units - if (descriptors_dirty) { - updater.Update(); - descriptors_dirty = false; - } - - // Bind an appropriate render pipeline - render_pipeline_key.fragment_config = PicaFSConfig::BuildFromRegs(regs); - auto result = render_pipelines.find(render_pipeline_key); - - // Try to use an already complete pipeline - vk::Pipeline pipeline; - if (result != render_pipelines.end()) { - pipeline = result->second; - } - else { - // Maybe the shader has been compiled but the pipeline state changed? - auto shader = render_fragment_shaders.find(render_pipeline_key.fragment_config); - if (shader != render_fragment_shaders.end()) { - render_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eFragment, shader->second); - } - else { - // Re-compile shader module and create new pipeline - auto code = GenerateFragmentShader(render_pipeline_key.fragment_config); - auto module = CompileShader(code, vk::ShaderStageFlagBits::eFragment); - render_fragment_shaders.emplace(render_pipeline_key.fragment_config, module); - render_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eFragment, module); - } - - // Update pipeline builder - auto& att = render_pipeline_key.blend_config; - render_pipeline_builder.SetRenderingFormats(render_pipeline_key.color, render_pipeline_key.depth_stencil); - render_pipeline_builder.SetBlendLogicOp(render_pipeline_key.blend_logic_op); - render_pipeline_builder.SetBlendAttachment(att.blendEnable, att.srcColorBlendFactor, att.dstColorBlendFactor, - att.colorBlendOp, att.srcAlphaBlendFactor, att.dstAlphaBlendFactor, - att.alphaBlendOp, att.colorWriteMask); - // Cache the resulted pipeline - pipeline = render_pipeline_builder.Build(); - render_pipelines.emplace(render_pipeline_key, pipeline); - } - - // Bind the render pipeline - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - cmdbuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); - - // Force set all dynamic state for new pipeline - dirty_flags.set(); - - ApplyCommonState(true); - - // Bind render descriptor sets - if (descriptor_sets[1]) { - cmdbuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, render_pipeline_layout, - 0, 3, descriptor_sets.data(), 0, nullptr); - return; - } - - LOG_CRITICAL(Render_Vulkan, "Texture unit descriptor set unallocated!"); - UNREACHABLE(); -} - -void VulkanState::ApplyPresentState() { - // Update present texture if it was reallocated by the renderer - if (descriptors_dirty) { - updater.Update(); - descriptors_dirty = false; - } - - // Bind present pipeline and descriptors - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - cmdbuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipeline); - - ApplyCommonState(false); - - if (descriptor_sets[3]) { - cmdbuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, - 0, 1, &descriptor_sets[3], 0, nullptr); - return; - } - - LOG_CRITICAL(Render_Vulkan, "Present descriptor set unallocated!"); - UNREACHABLE(); -} - -void VulkanState::ApplyCommonState(bool extended) { - // Re-apply dynamic parts of the pipeline - auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - if (dirty_flags.test(DynamicStateFlags::Viewport)) { - cmdbuffer.setViewport(0, viewport); - } - - if (dirty_flags.test(DynamicStateFlags::Scissor)) { - cmdbuffer.setScissor(0, scissor); - } - - if (dirty_flags.test(DynamicStateFlags::DepthTest) && extended) { - cmdbuffer.setDepthTestEnable(depth_enabled); - cmdbuffer.setDepthCompareOp(depth_op); - } - - if (dirty_flags.test(DynamicStateFlags::StencilTest) && extended) { - cmdbuffer.setStencilTestEnable(stencil_enabled); - cmdbuffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, stencil_ref); - cmdbuffer.setStencilOp(vk::StencilFaceFlagBits::eFrontAndBack, fail_op, pass_op, - depth_fail_op, stencil_op); - } - - if (dirty_flags.test(DynamicStateFlags::CullMode) && extended) { - cmdbuffer.setCullMode(cull_mode); - } - - if (dirty_flags.test(DynamicStateFlags::FrontFace) && extended) { - cmdbuffer.setFrontFace(front_face); - } - - if (dirty_flags.test(DynamicStateFlags::BlendConstants) && extended) { - cmdbuffer.setBlendConstants(blend_constants.data()); - } - - if (dirty_flags.test(DynamicStateFlags::StencilMask) && extended) { - cmdbuffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, stencil_write_mask); - cmdbuffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, stencil_input_mask); - } - - if (dirty_flags.test(DynamicStateFlags::DepthWrite) && extended) { - cmdbuffer.setDepthWriteEnable(depth_writes); - } - - dirty_flags.reset(); -} - -void VulkanState::BuildDescriptorLayouts() { - // Render descriptor layouts - std::array ubo_set{{ - {0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | - vk::ShaderStageFlagBits::eGeometry | vk::ShaderStageFlagBits::eFragment}, // shader_data - {1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex} // pica_uniforms - }}; - std::array texture_set{{ - {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex0 - {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex1 - {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex2 - {3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex_cube - }}; - std::array lut_set{{ - {0, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, // texture_buffer_lut_lf - {1, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, // texture_buffer_lut_rg - {2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment} // texture_buffer_lut_rgba - }}; - std::array present_set{{ - {0, vk::DescriptorType::eCombinedImageSampler, 3, vk::ShaderStageFlagBits::eFragment} - }}; - - std::array create_infos{{ - { {}, ubo_set }, { {}, texture_set }, { {}, lut_set }, { {}, present_set } - }}; - - // Create the descriptor set layouts - auto device = g_vk_instace->GetDevice(); - for (int i = 0; i < DESCRIPTOR_SET_COUNT; i++) { - descriptor_layouts[i] = device.createDescriptorSetLayout(create_infos[i]); - } -} - -void VulkanState::ConfigureRenderPipeline() { - // Make render pipeline layout - PipelineLayoutBuilder lbuilder; - lbuilder.AddDescriptorSet(descriptor_layouts[0]); - lbuilder.AddDescriptorSet(descriptor_layouts[1]); - lbuilder.AddDescriptorSet(descriptor_layouts[2]); - render_pipeline_layout = lbuilder.Build(); - - // Set rasterization state - render_pipeline_builder.Clear(); - render_pipeline_builder.SetPipelineLayout(render_pipeline_layout); - render_pipeline_builder.SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleList); - render_pipeline_builder.SetLineWidth(1.0f); - render_pipeline_builder.SetNoCullRasterizationState(); - render_pipeline_builder.SetRenderingFormats(render_pipeline_key.color, render_pipeline_key.depth_stencil); - - // Set depth, stencil tests and blending - render_pipeline_builder.SetNoDepthTestState(); - render_pipeline_builder.SetNoStencilState(); - render_pipeline_builder.SetBlendConstants(1.f, 1.f, 1.f, 1.f); - render_pipeline_builder.SetBlendAttachment(true, vk::BlendFactor::eOne, vk::BlendFactor::eZero, vk::BlendOp::eAdd, - vk::BlendFactor::eOne, vk::BlendFactor::eZero, vk::BlendOp::eAdd, - vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); - - // Enable every required dynamic state - std::array dynamic_states{ - vk::DynamicState::eDepthCompareOp, - vk::DynamicState::eDepthTestEnable, vk::DynamicState::eStencilTestEnable, - vk::DynamicState::eStencilOp, - vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, - vk::DynamicState::eStencilReference, vk::DynamicState::eDepthWriteEnable, - vk::DynamicState::eCullMode, vk::DynamicState::eBlendConstants, - vk::DynamicState::eViewport, vk::DynamicState::eScissor, - vk::DynamicState::eFrontFace - }; - - render_pipeline_builder.SetDynamicStates(dynamic_states); - - // Configure vertex buffer - auto attributes = HardwareVertex::attribute_desc; - render_pipeline_builder.AddVertexBuffer(0, sizeof(HardwareVertex), vk::VertexInputRate::eVertex, attributes); - - // Add trivial vertex shader - auto code = GenerateTrivialVertexShader(true); - render_vertex_shader = CompileShader(code, vk::ShaderStageFlagBits::eVertex); - render_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eVertex, render_vertex_shader); -} - -void VulkanState::ConfigurePresentPipeline() { - // Make present pipeline layout - PipelineLayoutBuilder lbuilder; - lbuilder.AddDescriptorSet(descriptor_layouts[3]); - lbuilder.AddPushConstants(vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, 0, sizeof(DrawInfo)); - present_pipeline_layout = lbuilder.Build(); - - // Set rasterization state - present_pipeline_builder.Clear(); - present_pipeline_builder.SetPipelineLayout(present_pipeline_layout); - present_pipeline_builder.SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleStrip); - present_pipeline_builder.SetLineWidth(1.0f); - present_pipeline_builder.SetNoCullRasterizationState(); - present_pipeline_builder.SetRenderingFormats(vk::Format::eB8G8R8A8Unorm); - - // Set depth, stencil tests and blending - present_pipeline_builder.SetNoDepthTestState(); - present_pipeline_builder.SetNoStencilState(); - present_pipeline_builder.SetNoBlendingState(); - - // Enable every required dynamic state - std::array dynamic_states{ - vk::DynamicState::eViewport, - vk::DynamicState::eScissor, - }; - - present_pipeline_builder.SetDynamicStates(dynamic_states); - - // Configure vertex buffer - auto attributes = ScreenRectVertex::attribute_desc; - present_pipeline_builder.AddVertexBuffer(0, sizeof(ScreenRectVertex), vk::VertexInputRate::eVertex, attributes); - - // Configure shader stages - auto vertex_code = GetPresentVertexShader(); - present_vertex_shader = CompileShader(vertex_code, vk::ShaderStageFlagBits::eVertex); - present_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eVertex, present_vertex_shader); - - auto fragment_code = GetPresentFragmentShader(); - present_fragment_shader = CompileShader(fragment_code, vk::ShaderStageFlagBits::eFragment); - present_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eFragment, present_fragment_shader); - - present_pipeline = present_pipeline_builder.Build(); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state.h b/src/video_core/renderer_vulkan/vk_state.h deleted file mode 100644 index 7f82cc614..000000000 --- a/src/video_core/renderer_vulkan/vk_state.h +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include "video_core/regs.h" -#include "video_core/renderer_vulkan/vk_buffer.h" -#include "video_core/renderer_vulkan/vk_shader_state.h" -#include "video_core/renderer_vulkan/vk_pipeline_builder.h" - -namespace Vulkan { - -constexpr u32 DESCRIPTOR_SET_COUNT = 4; - -struct DrawInfo { - glm::mat4 modelview; - glm::vec4 i_resolution; - glm::vec4 o_resolution; - int layer; -}; - -class DescriptorUpdater { -public: - DescriptorUpdater() { Reset(); } - ~DescriptorUpdater() = default; - - void Reset(); - void Update(); - - void PushTextureArrayUpdate(vk::DescriptorSet, u32 biding, vk::Sampler sampler, - std::span views); - void PushCombinedImageSamplerUpdate(vk::DescriptorSet set, u32 binding, - vk::Sampler sampler, vk::ImageView view); - void PushBufferUpdate(vk::DescriptorSet set, u32 binding, - vk::DescriptorType buffer_type, u32 offset, u32 size, - vk::Buffer buffer, const vk::BufferView& view = VK_NULL_HANDLE); - -private: - static constexpr u32 MAX_DESCRIPTORS = 10; - static constexpr u32 MAX_UPDATES = 20; - struct Descriptor { - vk::DescriptorImageInfo image_info; - vk::DescriptorBufferInfo buffer_info; - }; - - std::array writes; - std::array image_infos; - std::array buffer_infos; - u32 image_count{0}, buffer_count{0}, write_count{0}; -}; - -class Swapchain; - -/// Tracks global Vulkan state -class VulkanState { -public: - VulkanState(const std::shared_ptr& swapchain); - ~VulkanState(); - - /// Initialize object to its initial state - static void Create(const std::shared_ptr& swapchain); - static VulkanState& Get(); - - /// Query state - bool DepthTestEnabled() const { return depth_enabled && depth_writes; } - bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; } - - /// Configure drawing state - void SetVertexBuffer(const StreamBuffer& buffer, vk::DeviceSize offset); - void SetViewport(vk::Viewport viewport); - void SetScissor(vk::Rect2D scissor); - void SetCullMode(vk::CullModeFlags flags); - void SetFrontFace(vk::FrontFace face); - void SetLogicOp(vk::LogicOp logic_op); - void SetStencilWrite(u32 mask); - void SetStencilInput(u32 mask); - void SetStencilTest(bool enable, vk::StencilOp fail, vk::StencilOp pass, vk::StencilOp depth_fail, - vk::CompareOp compare, u32 ref); - void SetDepthWrite(bool enable); - void SetDepthTest(bool enable, vk::CompareOp compare); - void SetColorMask(vk::ColorComponentFlags mask); - void SetBlendEnable(bool enable); - void SetBlendCostants(float red, float green, float blue, float alpha); - void SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::BlendFactor src_color, vk::BlendFactor dst_color, - vk::BlendFactor src_alpha, vk::BlendFactor dst_alpha); - - /// Rendering - void BeginRendering(Texture* color, Texture* depth, bool update_pipeline_formats = false, - vk::ClearColorValue color_clear = {}, - vk::AttachmentLoadOp color_load_op = vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp color_store_op = vk::AttachmentStoreOp::eStore, - vk::ClearDepthStencilValue depth_clear = {}, - vk::AttachmentLoadOp depth_load_op = vk::AttachmentLoadOp::eLoad, - vk::AttachmentStoreOp depth_store_op = vk::AttachmentStoreOp::eStore, - vk::AttachmentLoadOp stencil_load_op = vk::AttachmentLoadOp::eDontCare, - vk::AttachmentStoreOp stencil_store_op = vk::AttachmentStoreOp::eDontCare); - void EndRendering(); - - /// Configure shader resources - void SetUniformBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer); - void SetTexture(u32 binding, const Texture& texture); - void SetTexelBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer, u32 view_index); - void SetPresentTextures(vk::ImageView view0, vk::ImageView view1, vk::ImageView view2); - void SetPresentData(DrawInfo data); - void SetPlaceholderColor(u8 red, u8 green, u8 blue, u8 alpha); - void UnbindTexture(const Texture& image); - void UnbindTexture(u32 unit); - - /// Apply all dirty state to the current Vulkan command buffer - void InitDescriptorSets(); - void ApplyRenderState(const Pica::Regs& config); - void ApplyPresentState(); - void ApplyCommonState(bool extended); - -private: - void BuildDescriptorLayouts(); - void ConfigureRenderPipeline(); - void ConfigurePresentPipeline(); - -private: - // Render targets - std::shared_ptr swapchain; - bool rendering{false}; - vk::ImageView present_view; - std::array render_views; - vk::Sampler render_sampler, present_sampler; - Texture placeholder; - - // Render state - bool descriptors_dirty{}; - DescriptorUpdater updater; - std::array descriptor_layouts; - std::array descriptor_sets; - - // Pipeline caches - PipelineCacheKey render_pipeline_key{}; - PipelineBuilder render_pipeline_builder, present_pipeline_builder; - vk::PipelineLayout render_pipeline_layout, present_pipeline_layout; - std::unordered_map render_pipelines; - vk::Pipeline present_pipeline; - - // Shader caches - vk::ShaderModule render_vertex_shader, present_vertex_shader, present_fragment_shader; - std::unordered_map render_fragment_shaders; - - // Dynamic state - enum DynamicStateFlags : u32 { - Viewport, - Scissor, - LineWidth, - DepthTest, - DepthWrite, - StencilTest, - StencilMask, - ColorWrite, - CullMode, - BlendConstants, - FrontFace, - }; - - std::bitset<16> dirty_flags; - u32 stencil_write_mask{}, stencil_input_mask{}, stencil_ref{}; - bool depth_enabled{}, depth_writes{}, stencil_enabled{}, stencil_writes{}; - vk::StencilOp fail_op, pass_op, depth_fail_op; - vk::CompareOp depth_op, stencil_op; - - vk::Viewport viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; - vk::CullModeFlags cull_mode{}; - vk::FrontFace front_face{}; - vk::Rect2D scissor{}; - vk::LogicOp logic_op{}; - std::array blend_constants{}; -}; - -extern std::unique_ptr s_vulkan_state; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_surface_params.cpp b/src/video_core/renderer_vulkan/vk_surface_params.cpp deleted file mode 100644 index a2c297c9a..000000000 --- a/src/video_core/renderer_vulkan/vk_surface_params.cpp +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/alignment.h" -#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" -#include "video_core/renderer_vulkan/vk_surface_params.h" - -namespace Vulkan { - -SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { - SurfaceParams params = *this; - const u32 tiled_size = is_tiled ? 8 : 1; - const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - PAddr aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - PAddr aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); - - if (aligned_end - aligned_start > stride_tiled_bytes) { - params.addr = aligned_start; - params.height = (aligned_end - aligned_start) / BytesInPixels(stride); - } else { - // 1 row - ASSERT(aligned_end - aligned_start == stride_tiled_bytes); - const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); - aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); - aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); - params.addr = aligned_start; - params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size; - params.stride = params.width; - params.height = tiled_size; - } - params.UpdateParams(); - - return params; -} - -SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle unscaled_rect) const { - if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { - return {}; - } - - if (is_tiled) { - unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; - unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; - unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; - unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; - } - - const u32 stride_tiled = !is_tiled ? stride : stride * 8; - - const u32 pixel_offset = - stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + - unscaled_rect.left; - - const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); - - return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; -} - -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } - } - return result; -} - -Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { - const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); - - if (is_tiled) { - const int x0 = (begin_pixel_index % (stride * 8)) / 8; - const int y0 = (begin_pixel_index / (stride * 8)) * 8; - // Top to bottom - return Common::Rectangle(x0, height - y0, x0 + sub_surface.width, - height - (y0 + sub_surface.height)); - } - - const int x0 = begin_pixel_index % stride; - const int y0 = begin_pixel_index / stride; - // Bottom to top - return Common::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); -} - -Common::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { - auto rect = GetSubRect(sub_surface); - rect.left = rect.left * res_scale; - rect.right = rect.right * res_scale; - rect.top = rect.top * res_scale; - rect.bottom = rect.bottom * res_scale; - return rect; -} - -bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == - std::tie(addr, width, height, stride, pixel_format, is_tiled) && - pixel_format != PixelFormat::Invalid; -} - -bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - return sub_surface.addr >= addr && sub_surface.end <= end && - sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && - (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && - GetSubRect(sub_surface).right <= stride; -} - -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && - addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && - (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % - BytesInPixels(stride * (is_tiled ? 8 : 1)) == - 0; -} - -bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { - if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || - end < texcopy_params.end) { - return false; - } - if (texcopy_params.width != texcopy_params.stride) { - const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1)); - return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && - ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; - } - return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_surface_params.h b/src/video_core/renderer_vulkan/vk_surface_params.h deleted file mode 100644 index 0e60de77e..000000000 --- a/src/video_core/renderer_vulkan/vk_surface_params.h +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "common/assert.h" -#include "common/math_util.h" -#include "core/hw/gpu.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/regs_texturing.h" - -namespace Vulkan { - -struct CachedSurface; -using Surface = std::shared_ptr; - -using SurfaceInterval = boost::icl::right_open_interval; - -struct SurfaceParams { -private: - static constexpr std::array BPP_TABLE = { - 32, // RGBA8 - 24, // RGB8 - 16, // RGB5A1 - 16, // RGB565 - 16, // RGBA4 - 16, // IA8 - 16, // RG8 - 8, // I8 - 8, // A8 - 8, // IA4 - 4, // I4 - 4, // A4 - 4, // ETC1 - 8, // ETC1A4 - 16, // D16 - 0, - 24, // D24 - 32, // D24S8 - }; - -public: - enum class PixelFormat { - // First 5 formats are shared between textures and color buffers - RGBA8 = 0, - RGB8 = 1, - RGB5A1 = 2, - RGB565 = 3, - RGBA4 = 4, - - // Texture-only formats - IA8 = 5, - RG8 = 6, - I8 = 7, - A8 = 8, - IA4 = 9, - I4 = 10, - A4 = 11, - ETC1 = 12, - ETC1A4 = 13, - - // Depth buffer-only formats - D16 = 14, - // gap - D24 = 16, - D24S8 = 17, - - Invalid = 255, - }; - - enum class SurfaceType { - Color = 0, - Texture = 1, - Depth = 2, - DepthStencil = 3, - Fill = 4, - Invalid = 5 - }; - - static constexpr unsigned int GetFormatBpp(PixelFormat format) { - const auto format_idx = static_cast(format); - DEBUG_ASSERT_MSG(format_idx < BPP_TABLE.size(), "Invalid pixel format {}", format_idx); - return BPP_TABLE[format_idx]; - } - - unsigned int GetFormatBpp() const { - return GetFormatBpp(pixel_format); - } - - static std::string_view PixelFormatAsString(PixelFormat format) { - switch (format) { - case PixelFormat::RGBA8: - return "RGBA8"; - case PixelFormat::RGB8: - return "RGB8"; - case PixelFormat::RGB5A1: - return "RGB5A1"; - case PixelFormat::RGB565: - return "RGB565"; - case PixelFormat::RGBA4: - return "RGBA4"; - case PixelFormat::IA8: - return "IA8"; - case PixelFormat::RG8: - return "RG8"; - case PixelFormat::I8: - return "I8"; - case PixelFormat::A8: - return "A8"; - case PixelFormat::IA4: - return "IA4"; - case PixelFormat::I4: - return "I4"; - case PixelFormat::A4: - return "A4"; - case PixelFormat::ETC1: - return "ETC1"; - case PixelFormat::ETC1A4: - return "ETC1A4"; - case PixelFormat::D16: - return "D16"; - case PixelFormat::D24: - return "D24"; - case PixelFormat::D24S8: - return "D24S8"; - default: - return "Not a real pixel format"; - } - } - - static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { - return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid; - } - - static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) { - return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; - } - - static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) { - return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) - : PixelFormat::Invalid; - } - - static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { - switch (format) { - // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat - case GPU::Regs::PixelFormat::RGB565: - return PixelFormat::RGB565; - case GPU::Regs::PixelFormat::RGB5A1: - return PixelFormat::RGB5A1; - default: - return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; - } - } - - static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { - SurfaceType a_type = GetFormatType(pixel_format_a); - SurfaceType b_type = GetFormatType(pixel_format_b); - - if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && - (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { - return true; - } - - if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { - return true; - } - - if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { - return true; - } - - return false; - } - - static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { - if ((unsigned int)pixel_format < 5) { - return SurfaceType::Color; - } - - if ((unsigned int)pixel_format < 14) { - return SurfaceType::Texture; - } - - if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { - return SurfaceType::Depth; - } - - if (pixel_format == PixelFormat::D24S8) { - return SurfaceType::DepthStencil; - } - - return SurfaceType::Invalid; - } - - /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" - /// and "pixel_format" - void UpdateParams() { - if (stride == 0) { - stride = width; - } - type = GetFormatType(pixel_format); - size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) - : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); - end = addr + size; - } - - SurfaceInterval GetInterval() const { - return SurfaceInterval(addr, end); - } - - // Returns the outer rectangle containing "interval" - SurfaceParams FromInterval(SurfaceInterval interval) const; - - SurfaceInterval GetSubRectInterval(Common::Rectangle unscaled_rect) const; - - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - - u32 GetScaledWidth() const { - return width * res_scale; - } - - u32 GetScaledHeight() const { - return height * res_scale; - } - - Common::Rectangle GetRect() const { - return {0, height, width, 0}; - } - - Common::Rectangle GetScaledRect() const { - return {0, GetScaledHeight(), GetScaledWidth(), 0}; - } - - u32 PixelsInBytes(u32 size) const { - return size * CHAR_BIT / GetFormatBpp(pixel_format); - } - - u32 BytesInPixels(u32 pixels) const { - return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; - } - - bool ExactMatch(const SurfaceParams& other_surface) const; - bool CanSubRect(const SurfaceParams& sub_surface) const; - bool CanExpand(const SurfaceParams& expanded_surface) const; - bool CanTexCopy(const SurfaceParams& texcopy_params) const; - - Common::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; - Common::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; - - PAddr addr = 0; - PAddr end = 0; - u32 size = 0; - - u32 width = 0; - u32 height = 0; - u32 stride = 0; - u16 res_scale = 1; - - bool is_tiled = false; - PixelFormat pixel_format = PixelFormat::Invalid; - SurfaceType type = SurfaceType::Invalid; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index 57a7d29f7..6d3a1a608 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -147,8 +147,8 @@ Texture::~Texture() { // Schedule deletion of the texture after it's no longer used by the GPU scheduler.Schedule(deleter); } else if (!is_texture_owned) { - // If the texture is not owning, destroy the view immediately as - // synchronization is the caller's responsibility + // If the texture is not owning, destroy the view immediately. + // Synchronization is the caller's responsibility vk::Device device = instance.GetDevice(); device.destroyImageView(image_view); } @@ -261,10 +261,9 @@ void Texture::Upload(Rect2D rectangle, u32 stride, std::span data, u32 // If the adverised format supports blitting then use GPU accelerated // format conversion. if (internal_format != advertised_format && - instance.IsFormatSupported(advertised_format, - vk::FormatFeatureFlagBits::eBlitSrc)) { + instance.IsFormatSupported(advertised_format, vk::FormatFeatureFlagBits::eBlitSrc)) { // Creating a new staging texture for each upload/download is expensive - // but this path is not common. TODO: Profile this + // but this path should not be common. TODO: Profile this StagingTexture staging{instance, scheduler, info}; const std::array offsets = { @@ -274,9 +273,19 @@ void Texture::Upload(Rect2D rectangle, u32 stride, std::span data, u32 }; const vk::ImageBlit image_blit = { - .srcSubresource = {aspect, level, 0, 1}, + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1 + }, .srcOffsets = offsets, - .dstSubresource = {aspect, level, 0, 1}, + .dstSubresource = { + .aspectMask = aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1 + }, .dstOffsets = offsets }; @@ -301,7 +310,7 @@ void Texture::Upload(Rect2D rectangle, u32 stride, std::span data, u32 std::memcpy(slice.data(), data.data(), byte_count); staging.Commit(byte_count); - // TODO: Handle depth and stencil uploads + // TODO: Handle format convertions and depth/stencil uploads ASSERT(aspect == vk::ImageAspectFlagBits::eColor && advertised_format == internal_format); @@ -338,10 +347,9 @@ void Texture::Download(Rect2D rectangle, u32 stride, std::span data, u32 lev // If the adverised format supports blitting then use GPU accelerated // format conversion. if (internal_format != advertised_format && - instance.IsFormatSupported(advertised_format, - vk::FormatFeatureFlagBits::eBlitDst)) { + instance.IsFormatSupported(advertised_format, vk::FormatFeatureFlagBits::eBlitDst)) { // Creating a new staging texture for each upload/download is expensive - // but this path is not common. TODO: Profile this + // but this path should not be common. TODO: Profile this StagingTexture staging{instance, scheduler, info}; const std::array offsets = { @@ -351,9 +359,19 @@ void Texture::Download(Rect2D rectangle, u32 stride, std::span data, u32 lev }; const vk::ImageBlit image_blit = { - .srcSubresource = {aspect, level, 0, 1}, + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1 + }, .srcOffsets = offsets, - .dstSubresource = {aspect, level, 0, 1}, + .dstSubresource = { + .aspectMask = aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1 + }, .dstOffsets = offsets }; @@ -376,6 +394,10 @@ void Texture::Download(Rect2D rectangle, u32 stride, std::span data, u32 lev Buffer& staging = scheduler.GetCommandUploadBuffer(); const u64 staging_offset = staging.GetCurrentOffset(); + // TODO: Handle format convertions and depth/stencil downloads + ASSERT(aspect == vk::ImageAspectFlagBits::eColor && + advertised_format == internal_format); + const vk::BufferImageCopy copy_region = { .bufferOffset = staging_offset, .bufferRowLength = stride, @@ -409,6 +431,7 @@ void Texture::Download(Rect2D rectangle, u32 stride, std::span data, u32 lev void Texture::BlitTo(TextureHandle dest, Rect2D source_rect, Rect2D dest_rect, u32 src_level, u32 dest_level, u32 src_layer, u32 dest_layer) { + Texture* dest_texture = static_cast(dest.Get()); // Prepare images for transfer @@ -449,7 +472,7 @@ void Texture::BlitTo(TextureHandle dest, Rect2D source_rect, Rect2D dest_rect, u dest_texture->GetHandle(), vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eNearest); - // Revert changes to the layout + // Prepare for shader reads Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); dest_texture->Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); } @@ -501,6 +524,42 @@ void Texture::GenerateMipmaps() { Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal, 0, info.levels); } +void Texture::CopyFrom(TextureHandle source) { + const vk::ImageCopy image_copy = { + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .srcOffset = {0, 0, 0}, + .dstSubresource = { + .aspectMask = aspect, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .dstOffset = {0, 0, 0}, + .extent = {source->GetWidth(), source->GetHeight(), 1} + }; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + Texture* texture = static_cast(source.Get()); + + // Transition images + vk::ImageLayout old_layout = texture->GetLayout(); + texture->Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal); + Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal); + + // Perform copy + command_buffer.copyImage(texture->GetHandle(), vk::ImageLayout::eTransferSrcOptimal, + image, vk::ImageLayout::eTransferDstOptimal, image_copy); + + // We need to preserve the old texture layout + texture->Transition(command_buffer, old_layout); + Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); +} + StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler, const TextureInfo& info) : TextureBase(info), instance(instance), scheduler(scheduler) { @@ -538,9 +597,8 @@ StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler, // Map memory mapped_ptr = alloc_info.pMappedData; - // Transition image to VK_IMAGE_LAYOUT_GENERAL. This layout is convenient - // for staging textures since it allows for well defined host access and - // works with vkCmdBlitImage, thus eliminating the need for layout transitions + // For staging textures the most conventient layout is VK_IMAGE_LAYOUT_GENERAL because it allows + // for well defined host access and works with vkCmdBlitImage, thus eliminating the need for layout transitions const vk::ImageMemoryBarrier barrier = { .srcAccessMask = vk::AccessFlagBits::eNone, .dstAccessMask = vk::AccessFlagBits::eNone, diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index 149a40a29..7b2e7bf81 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -38,6 +38,8 @@ public: void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect, u32 src_level = 0, u32 dest_level = 0, u32 src_layer = 0, u32 dest_layer = 0) override; + void CopyFrom(TextureHandle source) override; + void GenerateMipmaps() override; /// Overrides the layout of provided image subresource