From 84f97fc77eae02ba599f84b1ffddb2644dcecddb Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 28 Apr 2022 18:14:13 +0300 Subject: [PATCH] renderer_vulkan: Implement basic utility classes * These first few commits are mostly code dumps until the renderer interface is finalized. This commits adds some basic Vulkan objects like textures, buffers, sampler and translates some simpler OpenGL stuff to Vulkan. * In addition a simple resource cache is implemented that handles recycling of samplers, renderpasses and also stores the constant descriptor layout infos. I've added all the resources I could deduce being used in shaders though I might be missing something. I'm still divided though on whether the resource cache should also handle pipelines. --- src/common/common_types.h | 3 + src/video_core/CMakeLists.txt | 19 +- src/video_core/renderer_vulkan/vk_buffer.cpp | 97 +- src/video_core/renderer_vulkan/vk_buffer.h | 70 +- src/video_core/renderer_vulkan/vk_context.h | 75 - .../{vk_context.cpp => vk_instance.cpp} | 0 src/video_core/renderer_vulkan/vk_instance.h | 72 + .../renderer_vulkan/vk_pipeline.cpp | 61 + src/video_core/renderer_vulkan/vk_pipeline.h | 49 + .../renderer_vulkan/vk_pipeline_manager.cpp | 668 +++++ .../renderer_vulkan/vk_pipeline_manager.h | 131 + .../renderer_vulkan/vk_rasterizer.cpp | 2252 +++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 338 +++ .../renderer_vulkan/vk_rasterizer_cache.cpp | 1875 ++++++++++++++ .../renderer_vulkan/vk_rasterizer_cache.h | 371 +++ .../renderer_vulkan/vk_resource_cache.cpp | 164 ++ .../renderer_vulkan/vk_resource_cache.h | 58 + .../renderer_vulkan/vk_resource_manager.cpp | 246 -- .../renderer_vulkan/vk_resource_manager.h | 243 -- .../renderer_vulkan/vk_shader_state.h | 235 ++ src/video_core/renderer_vulkan/vk_state.h | 23 +- .../renderer_vulkan/vk_surface_params.cpp | 171 ++ .../renderer_vulkan/vk_surface_params.h | 270 ++ src/video_core/renderer_vulkan/vk_swapchain.h | 1 - src/video_core/renderer_vulkan/vk_texture.cpp | 122 +- src/video_core/renderer_vulkan/vk_texture.h | 71 +- 26 files changed, 6925 insertions(+), 760 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_context.h rename src/video_core/renderer_vulkan/{vk_context.cpp => vk_instance.cpp} (100%) create mode 100644 src/video_core/renderer_vulkan/vk_instance.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_manager.cpp create mode 100644 src/video_core/renderer_vulkan/vk_pipeline_manager.h create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.cpp create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.h create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer_cache.h create mode 100644 src/video_core/renderer_vulkan/vk_resource_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_resource_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_resource_manager.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_resource_manager.h create mode 100644 src/video_core/renderer_vulkan/vk_shader_state.h create mode 100644 src/video_core/renderer_vulkan/vk_surface_params.cpp create mode 100644 src/video_core/renderer_vulkan/vk_surface_params.h diff --git a/src/common/common_types.h b/src/common/common_types.h index ee18eac81..2d723ec5a 100644 --- a/src/common/common_types.h +++ b/src/common/common_types.h @@ -56,6 +56,9 @@ protected: constexpr NonCopyable() = default; ~NonCopyable() = default; + // Enable std::move operations + NonCopyable(NonCopyable&&) = default; + NonCopyable(const NonCopyable&) = delete; NonCopyable& operator=(const NonCopyable&) = delete; }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d5796f468..fb808b1ee 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -73,12 +73,23 @@ add_library(video_core STATIC renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_buffer.cpp renderer_vulkan/vk_buffer.h - renderer_vulkan/vk_context.cpp - renderer_vulkan/vk_context.h - renderer_vulkan/vk_resource_manager.cpp - renderer_vulkan/vk_resource_manager.h + renderer_vulkan/vk_instance.cpp + renderer_vulkan/vk_instance.h + renderer_vulkan/vk_resource_cache.cpp + renderer_vulkan/vk_resource_cache.h + renderer_vulkan/vk_rasterizer_cache.cpp + renderer_vulkan/vk_rasterizer_cache.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_pipeline.cpp + renderer_vulkan/vk_pipeline.h + renderer_vulkan/vk_pipeline_manager.h + renderer_vulkan/vk_pipeline_manager.cpp + renderer_vulkan/vk_shader_state.h renderer_vulkan/vk_state.cpp renderer_vulkan/vk_state.h + renderer_vulkan/vk_surface_params.cpp + renderer_vulkan/vk_surface_params.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture.cpp diff --git a/src/video_core/renderer_vulkan/vk_buffer.cpp b/src/video_core/renderer_vulkan/vk_buffer.cpp index a15d4310d..8a791dc49 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer.cpp @@ -1,64 +1,59 @@ -#include "vk_buffer.h" -#include "vk_context.h" -#include +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_buffer.h" +#include "video_core/renderer_vulkan/vk_instance.h" #include #include #include -Buffer::Buffer(std::shared_ptr context) : - context(context) +namespace Vulkan { + +VKBuffer::~VKBuffer() { + if (memory != nullptr) { + g_vk_instace->GetDevice().unmapMemory(buffer_memory.get()); + } } -Buffer::~Buffer() +void VKBuffer::Create(uint32_t byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, vk::Format view_format) { - auto& device = context->device; - if (memory != nullptr) - device->unmapMemory(buffer_memory.get()); -} - -void Buffer::create(uint32_t byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage) -{ - auto& device = context->device; + auto& device = g_vk_instace->GetDevice(); size = byte_count; vk::BufferCreateInfo bufferInfo({}, byte_count, usage); - buffer = device->createBufferUnique(bufferInfo); + buffer = device.createBufferUnique(bufferInfo); - auto mem_requirements = device->getBufferMemoryRequirements(buffer.get()); + auto mem_requirements = device.getBufferMemoryRequirements(buffer.get()); - auto memory_type_index = find_memory_type(mem_requirements.memoryTypeBits, properties, context); + auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, properties); vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index); - buffer_memory = device->allocateMemoryUnique(alloc_info); - device->bindBufferMemory(buffer.get(), buffer_memory.get(), 0); + buffer_memory = device.allocateMemoryUnique(alloc_info); + device.bindBufferMemory(buffer.get(), buffer_memory.get(), 0); // Optionally map the buffer to CPU memory if (properties & vk::MemoryPropertyFlagBits::eHostVisible) - memory = device->mapMemory(buffer_memory.get(), 0, byte_count); + memory = device.mapMemory(buffer_memory.get(), 0, byte_count); // Create buffer view for texel buffers if (usage & vk::BufferUsageFlagBits::eStorageTexelBuffer || usage & vk::BufferUsageFlagBits::eUniformTexelBuffer) { - vk::BufferViewCreateInfo view_info({}, buffer.get(), vk::Format::eR32Uint, 0, byte_count); - buffer_view = device->createBufferViewUnique(view_info); + vk::BufferViewCreateInfo view_info({}, buffer.get(), view_format, 0, byte_count); + buffer_view = device.createBufferViewUnique(view_info); } } -void Buffer::bind(vk::CommandBuffer& command_buffer) +void VKBuffer::CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region) { - vk::DeviceSize offsets[1] = { 0 }; - command_buffer.bindVertexBuffers(0, 1, &buffer.get(), offsets); -} + auto& device = g_vk_instace->GetDevice(); + auto& queue = g_vk_instace->graphics_queue; -void Buffer::copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::BufferCopy& region) -{ - auto& context = src_buffer.context; - auto& device = context->device; - auto& queue = context->graphics_queue; - - vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); - vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0]; + vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); + vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0]; command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); command_buffer.copyBuffer(src_buffer.buffer.get(), dst_buffer.buffer.get(), region); @@ -68,12 +63,12 @@ void Buffer::copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::Buffe queue.submit(submit_info, nullptr); queue.waitIdle(); - device->freeCommandBuffers(context->command_pool.get(), command_buffer); + device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer); } -uint32_t Buffer::find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr context) +uint32_t VKBuffer::FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties) { - vk::PhysicalDeviceMemoryProperties mem_properties = context->physical_device.getMemoryProperties(); + vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties(); for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { @@ -82,32 +77,8 @@ uint32_t Buffer::find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags return i; } - throw std::runtime_error("[VK] Failed to find suitable memory type!"); + LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type."); + UNREACHABLE(); } -VertexBuffer::VertexBuffer(const std::shared_ptr& context) : - host(context), local(context), context(context) -{ -} - -void VertexBuffer::create(uint32_t vertex_count) -{ - // Create a host and local buffer - auto byte_count = sizeof(Vertex) * vertex_count; - local.create(byte_count, vk::MemoryPropertyFlagBits::eDeviceLocal, - vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eVertexBuffer); - host.create(byte_count, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, - vk::BufferUsageFlagBits::eTransferSrc); -} - -void VertexBuffer::copy_vertices(Vertex* vertices, uint32_t count) -{ - auto byte_count = count * sizeof(Vertex); - std::memcpy(host.memory, vertices, byte_count); - Buffer::copy_buffer(host, local, { 0, 0, byte_count }); -} - -void VertexBuffer::bind(vk::CommandBuffer& command_buffer) -{ - local.bind(command_buffer); } diff --git a/src/video_core/renderer_vulkan/vk_buffer.h b/src/video_core/renderer_vulkan/vk_buffer.h index cd82a251e..359da7a0e 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.h +++ b/src/video_core/renderer_vulkan/vk_buffer.h @@ -1,48 +1,28 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #pragma once -#include -#include + #include #include +#include #include "common/common_types.h" -class VkContext; +namespace Vulkan { -struct VertexInfo -{ - VertexInfo() = default; - VertexInfo(glm::vec3 position, glm::vec3 color, glm::vec2 coords) : - position(position), color(color), texcoords(coords) {}; - - glm::vec3 position; - glm::vec3 color; - glm::vec2 texcoords; -}; - -struct Vertex : public VertexInfo -{ - Vertex() = default; - Vertex(glm::vec3 position, glm::vec3 color = {}, glm::vec2 coords = {}) : VertexInfo(position, color, coords) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); - static constexpr std::array attribute_desc = - { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, color)), - vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, texcoords)), - }; -}; - -class Buffer : public NonCopyable, public Resource -{ - friend class VertexBuffer; +/// Generic Vulkan buffer object used by almost every resource +class VKBuffer final : public NonCopyable { public: - Buffer(std::shared_ptr context); - ~Buffer(); + VKBuffer() = default; + VKBuffer(VKBuffer&&) = default; + ~VKBuffer(); - void create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage); - void bind(vk::CommandBuffer& command_buffer); + /// Create a generic Vulkan buffer object + void Create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, vk::Format view_format = vk::Format::eUndefined); - static uint32_t find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr context); - static void copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::BufferCopy& region); + static uint32_t FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties); + static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region); public: void* memory = nullptr; @@ -50,22 +30,6 @@ public: vk::UniqueDeviceMemory buffer_memory; vk::UniqueBufferView buffer_view; uint32_t size = 0; - -protected: - std::shared_ptr context; }; -class VertexBuffer -{ -public: - VertexBuffer(const std::shared_ptr& context); - ~VertexBuffer() = default; - - void create(uint32_t vertex_count); - void copy_vertices(Vertex* vertices, uint32_t count); - void bind(vk::CommandBuffer& command_buffer); - -private: - Buffer host, local; - std::shared_ptr context; -}; +} diff --git a/src/video_core/renderer_vulkan/vk_context.h b/src/video_core/renderer_vulkan/vk_context.h deleted file mode 100644 index 1e4d576bd..000000000 --- a/src/video_core/renderer_vulkan/vk_context.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once -#include "vk_swapchain.h" -#include -#include - -class VkWindow; -class VkContext; - -constexpr int MAX_BINDING_COUNT = 10; - -struct PipelineLayoutInfo -{ - friend class VkContext; - PipelineLayoutInfo(const std::shared_ptr& context); - ~PipelineLayoutInfo(); - - void add_shader_module(std::string_view filepath, vk::ShaderStageFlagBits stage); - void add_resource(Resource* resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int binding, int group = 0); - -private: - using DescInfo = std::pair, std::vector>; - - std::shared_ptr context; - std::unordered_map resource_types; - std::unordered_map needed; - std::vector shader_stages; -}; - -class VkTexture; - -// The vulkan context. Can only be created by the window -class VkContext -{ - friend class VkWindow; -public: - VkContext(vk::UniqueInstance&& instance, VkWindow* window); - ~VkContext(); - - void create(SwapchainInfo& info); - void create_graphics_pipeline(PipelineLayoutInfo& info); - - vk::CommandBuffer& get_command_buffer(); - -private: - void create_devices(int device_id = 0); - void create_renderpass(); - void create_command_buffers(); - void create_decriptor_sets(PipelineLayoutInfo& info); - -public: - // Queue family indexes - uint32_t queue_family = -1; - - // Core vulkan objects - vk::UniqueInstance instance; - vk::PhysicalDevice physical_device; - vk::UniqueDevice device; - vk::Queue graphics_queue; - - // Pipeline - vk::UniquePipelineLayout pipeline_layout; - vk::UniquePipeline graphics_pipeline; - vk::UniqueRenderPass renderpass; - vk::UniqueDescriptorPool descriptor_pool; - std::array, MAX_FRAMES_IN_FLIGHT> descriptor_layouts; - std::array, MAX_FRAMES_IN_FLIGHT> descriptor_sets; - - // Command buffer - vk::UniqueCommandPool command_pool; - std::vector command_buffers; - - // Window - VkWindow* window; - SwapchainInfo swapchain_info; -}; diff --git a/src/video_core/renderer_vulkan/vk_context.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp similarity index 100% rename from src/video_core/renderer_vulkan/vk_context.cpp rename to src/video_core/renderer_vulkan/vk_instance.cpp diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h new file mode 100644 index 000000000..70900f458 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -0,0 +1,72 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "common/common_types.h" + +namespace Vulkan { + +// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have +// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger +// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine +#ifdef ANDROID +// Reduce the size of swap_chain, since the UI only allows upto 200% speed. +constexpr std::size_t SWAP_CHAIN_SIZE = 6; +#else +constexpr std::size_t SWAP_CHAIN_SIZE = 9; +#endif + +/// The global Vulkan instance +class VKInstance +{ +public: + VKInstance() = default; + ~VKInstance(); + + /// Construct global Vulkan context + void Create(vk::UniqueInstance instance, vk::PhysicalDevice gpu, vk::UniqueSurfaceKHR surface, + bool enable_debug_reports, bool enable_validation_layer); + + vk::Device& GetDevice() { return device.get(); } + vk::PhysicalDevice& GetPhysicalDevice() { return physical_device; } + + /// Get a valid command buffer for the current frame + vk::CommandBuffer& GetCommandBuffer(); + + /// Feature support + bool SupportsAnisotropicFiltering() const; + +private: + void CreateDevices(int device_id = 0); + void CreateRenderpass(); + void CreateCommandBuffers(); + +public: + // Queue family indexes + u32 queue_family = -1; + + // Core vulkan objects + vk::UniqueInstance instance; + vk::PhysicalDevice physical_device; + vk::UniqueDevice device; + vk::Queue graphics_queue; + + // Pipeline + vk::UniqueDescriptorPool descriptor_pool; + std::array, SWAP_CHAIN_SIZE> descriptor_layouts; + std::array, SWAP_CHAIN_SIZE> descriptor_sets; + + // Command buffer + vk::UniqueCommandPool command_pool; + std::vector command_buffers; +}; + +extern std::unique_ptr g_vk_instace; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.cpp b/src/video_core/renderer_vulkan/vk_pipeline.cpp new file mode 100644 index 000000000..6532b2ba1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.cpp @@ -0,0 +1,61 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include + +namespace Vulkan { + +shaderc::Compiler compiler; + +void VKPipeline::Info::AddShaderModule(const vk::ShaderModule& source, vk::ShaderStageFlagBits stage) +{ + shaderc_shader_kind shader_stage; + std::string name; + switch (stage) + { + case vk::ShaderStageFlagBits::eVertex: + shader_stage = shaderc_glsl_vertex_shader; + name = "Vertex shader"; + break; + case vk::ShaderStageFlagBits::eCompute: + shader_stage = shaderc_glsl_compute_shader; + name = "Compute shader"; + break; + case vk::ShaderStageFlagBits::eFragment: + shader_stage = shaderc_glsl_fragment_shader; + name = "Fragment shader"; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown vulkan shader stage {}", stage); + UNREACHABLE(); + } + + shaderc::CompileOptions options; + options.SetOptimizationLevel(shaderc_optimization_level_performance); + options.SetAutoBindUniforms(true); + options.SetAutoMapLocations(true); + options.SetTargetEnvironment(shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_2); + + auto result = compiler.CompileGlslToSpv(source.c_str(), shader_stage, name.c_str(), options); + if (result.GetCompilationStatus() != shaderc_compilation_status_success) { + LOG_CRITICAL(Render_Vulkan, "Failed to compile GLSL shader with error: {}", result.GetErrorMessage()); + UNREACHABLE(); + } + + auto shader_code = std::vector{ result.cbegin(), result.cend() }; + + vk::ShaderModuleCreateInfo module_info({}, shader_code); + auto module = g_vk_instace->GetDevice().createShaderModuleUnique(module_info); + shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module.get(), "main"); + + return std::move(module); +} + + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..1e9bac825 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,49 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/renderer_vulkan/vk_buffer.h" +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace Vulkan { + +using Resource = std::variant; + +/// Vulkan pipeline objects represent a collection of shader modules +class VKPipeline final : private NonCopyable { +public: + /// Includes all required information to build a Vulkan pipeline object + class Info : private NonCopyable { + Info() = default; + ~Info() = default; + + /// Assign a shader module to a specific stage + void AddShaderModule(const vk::ShaderModule& module, vk::ShaderStageFlagBits stage); + + /// Add a texture or a buffer to the target descriptor set + void AddResource(const Resource& resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int set = 0); + + private: + using ResourceInfo = std::pair, vk::DescriptorSetLayoutBinding>; + + std::unordered_map> descriptor_sets; + std::vector shader_stages; + }; + + VKPipeline() = default; + ~VKPipeline() = default; + + /// Create a new Vulkan pipeline object + void Create(const Info& info); + void Create(vk::PipelineLayoutCreateInfo layout_info); + +private: + vk::UniquePipeline pipeline; + vk::UniquePipelineLayout pipeline_layout; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_pipeline_manager.cpp b/src/video_core/renderer_vulkan/vk_pipeline_manager.cpp new file mode 100644 index 000000000..a6bdd5e78 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_manager.cpp @@ -0,0 +1,668 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "video_core/video_core.h" +#include "core/frontend/scope_acquire_context.h" +#include "video_core/renderer_vulkan/vk_pipeline_manager.h" +#include +#include +#include +#include +#include + +namespace Vulkan { + +static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) { + std::size_t hash = 0; + u64 regs_uid = Common::ComputeHash64(regs.reg_array.data(), Pica::Regs::NUM_REGS * sizeof(u32)); + boost::hash_combine(hash, regs_uid); + if (code.size() > 0) { + u64 code_uid = Common::ComputeHash64(code.data(), code.size() * sizeof(u32)); + boost::hash_combine(hash, code_uid); + } + return static_cast(hash); +} + +static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, + const std::set& supported_formats, + bool separable) { + + if (supported_formats.find(dump.binary_format) == supported_formats.end()) { + LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); + return {}; + } + + auto shader = OGLProgram(); + shader.handle = glCreateProgram(); + if (separable) { + glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + } + glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(), + static_cast(dump.binary.size())); + + GLint link_status{}; + glGetProgramiv(shader.handle, GL_LINK_STATUS, &link_status); + if (link_status == GL_FALSE) { + LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); + return {}; + } + + return shader; +} + +static std::set GetSupportedFormats() { + std::set supported_formats; + + GLint num_formats{}; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + for (const GLint format : formats) + supported_formats.insert(static_cast(format)); + return supported_formats; +} + +static std::tuple BuildVSConfigFromRaw( + const ShaderDiskCacheRaw& raw) { + Pica::Shader::ProgramCode program_code{}; + Pica::Shader::SwizzleData swizzle_data{}; + std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH, + program_code.begin()); + std::copy_n(raw.GetProgramCode().begin() + Pica::Shader::MAX_PROGRAM_CODE_LENGTH, + Pica::Shader::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin()); + Pica::Shader::ShaderSetup setup; + setup.program_code = program_code; + setup.swizzle_data = swizzle_data; + return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup}; +} + +static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, + std::size_t expected_size) { + const GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index == GL_INVALID_INDEX) { + return; + } + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got {}, expected {}", + static_cast(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast(binding)); +} + +static void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, + sizeof(UniformData)); + SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, sizeof(VSUniformData)); +} + +static void SetShaderSamplerBinding(GLuint shader, const char* name, + TextureUnits::TextureUnit binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, binding.id); + } +} + +static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, static_cast(binding)); + } +} + +static void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0)); + SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1)); + SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2)); + SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); + + // Set the texture samplers to correspond to different lookup table texture units + SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); + + SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer); + SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX); + SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX); + SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY); + SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY); + SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ); + SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ); + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, + const Pica::Shader::ShaderSetup& setup) { + std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), + [](bool value) -> BoolAligned { return {value ? GL_TRUE : GL_FALSE}; }); + std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), + [](const auto& value) -> GLuvec4 { + return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; + }); + std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), + [](const auto& value) -> GLvec4 { + return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), + value.w.ToFloat32()}; + }); +} + +template +class ShaderCache { +public: + explicit ShaderCache(bool separable) : separable(separable) {} + std::tuple> Get( + const KeyConfigType& config) { + auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); + OGLShaderStage& cached_shader = iter->second; + std::optional result{}; + if (new_shader) { + result = CodeGenerator(config, separable); + cached_shader.Create(result->code.c_str(), ShaderType); + } + return {cached_shader.GetHandle(), std::move(result)}; + } + + void Inject(const KeyConfigType& key, OGLProgram&& program) { + OGLShaderStage stage{separable}; + stage.Inject(std::move(program)); + shaders.emplace(key, std::move(stage)); + } + + void Inject(const KeyConfigType& key, OGLShaderStage&& stage) { + shaders.emplace(key, std::move(stage)); + } + +private: + bool separable; + std::unordered_map shaders; +}; + +// This is a cache designed for shaders translated from PICA shaders. The first cache matches the +// config structure like a normal cache does. On cache miss, the second cache matches the generated +// GLSL code. The configuration is like this because there might be leftover code in the PICA shader +// program buffer from the previous shader, which is hashed into the config, resulting several +// different config values from the same shader program. +template (*CodeGenerator)( + const Pica::Shader::ShaderSetup&, const KeyConfigType&, bool), + GLenum ShaderType> +class ShaderDoubleCache { +public: + explicit ShaderDoubleCache(bool separable) : separable(separable) {} + std::tuple> Get( + const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) { + std::optional result{}; + auto map_it = shader_map.find(key); + if (map_it == shader_map.end()) { + auto program_opt = CodeGenerator(setup, key, separable); + if (!program_opt) { + shader_map[key] = nullptr; + return {0, std::nullopt}; + } + + std::string& program = program_opt->code; + auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{separable}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + result.emplace(); + result->code = program; + cached_shader.Create(program.c_str(), ShaderType); + } + shader_map[key] = &cached_shader; + return {cached_shader.GetHandle(), std::move(result)}; + } + + if (map_it->second == nullptr) { + return {0, std::nullopt}; + } + + return {map_it->second->GetHandle(), std::nullopt}; + } + + void Inject(const KeyConfigType& key, std::string decomp, OGLProgram&& program) { + OGLShaderStage stage{separable}; + stage.Inject(std::move(program)); + const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first; + OGLShaderStage& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + + void Inject(const KeyConfigType& key, std::string decomp, OGLShaderStage&& stage) { + const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first; + OGLShaderStage& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + +private: + bool separable; + std::unordered_map shader_map; + std::unordered_map shader_cache; +}; + +using ProgrammableVertexShaders = + ShaderDoubleCache; + +using FixedGeometryShaders = + ShaderCache; + +using FragmentShaders = ShaderCache; + +class ShaderProgramManager::Impl { +public: + explicit Impl(bool separable, bool is_amd) + : is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable), + trivial_vertex_shader(separable), fixed_geometry_shaders(separable), + fragment_shaders(separable), disk_cache(separable) { + if (separable) + pipeline.Create(); + } + + struct ShaderTuple { + GLuint vs = 0; + GLuint gs = 0; + GLuint fs = 0; + + std::size_t vs_hash = 0; + std::size_t gs_hash = 0; + std::size_t fs_hash = 0; + + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + bool operator!=(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + std::size_t GetConfigHash() const { + std::size_t hash = 0; + boost::hash_combine(hash, vs_hash); + boost::hash_combine(hash, gs_hash); + boost::hash_combine(hash, fs_hash); + return hash; + } + }; + + bool is_amd; + bool separable; + + ShaderTuple current; + + ProgrammableVertexShaders programmable_vertex_shaders; + TrivialVertexShader trivial_vertex_shader; + + FixedGeometryShaders fixed_geometry_shaders; + + FragmentShaders fragment_shaders; + std::unordered_map program_cache; + OGLPipeline pipeline; + ShaderDiskCache disk_cache; +}; + +ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, + bool is_amd) + : impl(std::make_unique(separable, is_amd)), emu_window{emu_window_} {} + +ShaderProgramManager::~ShaderProgramManager() = default; + +bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, + Pica::Shader::ShaderSetup& setup) { + PicaVSConfig config{regs.vs, setup}; + auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup); + if (handle == 0) + return false; + impl->current.vs = handle; + impl->current.vs_hash = config.Hash(); + + // Save VS to the disk cache if its a new shader + if (result) { + auto& disk_cache = impl->disk_cache; + ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()}; + program_code.insert(program_code.end(), setup.swizzle_data.begin(), + setup.swizzle_data.end()); + const u64 unique_identifier = GetUniqueIdentifier(regs, program_code); + const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, + std::move(program_code)}; + disk_cache.SaveRaw(raw); + disk_cache.SaveDecompiled(unique_identifier, *result, VideoCore::g_hw_shader_accurate_mul); + } + return true; +} + +void ShaderProgramManager::UseTrivialVertexShader() { + impl->current.vs = impl->trivial_vertex_shader.Get(); + impl->current.vs_hash = 0; +} + +void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { + PicaFixedGSConfig gs_config(regs); + auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); + impl->current.gs = handle; + impl->current.gs_hash = gs_config.Hash(); +} + +void ShaderProgramManager::UseTrivialGeometryShader() { + impl->current.gs = 0; + impl->current.gs_hash = 0; +} + +void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) { + PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); + auto [handle, result] = impl->fragment_shaders.Get(config); + impl->current.fs = handle; + impl->current.fs_hash = config.Hash(); + // Save FS to the disk cache if its a new shader + if (result) { + auto& disk_cache = impl->disk_cache; + u64 unique_identifier = GetUniqueIdentifier(regs, {}); + ShaderDiskCacheRaw raw{unique_identifier, ProgramType::FS, regs, {}}; + disk_cache.SaveRaw(raw); + disk_cache.SaveDecompiled(unique_identifier, *result, false); + } +} + +void ShaderProgramManager::ApplyTo(OpenGLState& state) { + if (impl->separable) { + if (impl->is_amd) { + // Without this reseting, AMD sometimes freezes when one stage is changed but not + // for the others. On the other hand, including this reset seems to introduce memory + // leak in Intel Graphics. + glUseProgramStages( + impl->pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0); + } + + glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs); + glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs); + glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = impl->pipeline.handle; + } else { + const u64 unique_identifier = impl->current.GetConfigHash(); + OGLProgram& cached_program = impl->program_cache[unique_identifier]; + if (cached_program.handle == 0) { + cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + auto& disk_cache = impl->disk_cache; + disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle, + VideoCore::g_hw_shader_accurate_mul); + + SetShaderUniformBlockBindings(cached_program.handle); + SetShaderSamplerBindings(cached_program.handle); + } + state.draw.shader_program = cached_program.handle; + } +} + +void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (!GLAD_GL_ARB_get_program_binary && !GLES) { + LOG_ERROR(Render_OpenGL, + "Cannot load disk cache as ARB_get_program_binary is not supported!"); + return; + } + + auto& disk_cache = impl->disk_cache; + const auto transferable = disk_cache.LoadTransferable(); + if (!transferable) { + return; + } + const auto& raws = *transferable; + + // Load uncompressed precompiled file for non-separable shaders. + // Precompiled file for separable shaders is compressed. + auto [decompiled, dumps] = disk_cache.LoadPrecompiled(impl->separable); + + if (stop_loading) { + return; + } + + std::set supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to serialize the + // virtual precompiled cache file back to the hard drive + bool precompiled_cache_altered = false; + + std::mutex mutex; + std::atomic_bool compilation_failed = false; + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); + } + std::vector load_raws_index; + // Loads both decompiled and precompiled shaders from the cache. If either one is missing for + const auto LoadPrecompiledShader = [&](std::size_t begin, std::size_t end, + const std::vector& raw_cache, + const ShaderDecompiledMap& decompiled_map, + const ShaderDumpsMap& dump_map) { + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + const auto& raw{raw_cache[i]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + + const u64 calculated_hash = + GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode()); + if (unique_identifier != calculated_hash) { + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing " + "shader cache", + raw.GetUniqueIdentifier(), calculated_hash); + disk_cache.InvalidateAll(); + return; + } + + const auto dump{dump_map.find(unique_identifier)}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + OGLProgram shader; + + if (dump != dump_map.end() && decomp != decompiled_map.end()) { + // Only load the vertex shader if its sanitize_mul setting matches + if (raw.GetProgramType() == ProgramType::VS && + decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader is dumped, attempt to load it + shader = + GeneratePrecompiledProgram(dump->second, supported_formats, impl->separable); + if (shader.handle == 0) { + // If any shader failed, stop trying to compile, delete the cache, and start + // loading from raws + compilation_failed = true; + return; + } + // we have both the binary shader and the decompiled, so inject it into the + // cache + if (raw.GetProgramType() == ProgramType::VS) { + auto [conf, setup] = BuildVSConfigFromRaw(raw); + std::scoped_lock lock(mutex); + impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, + std::move(shader)); + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + std::scoped_lock lock(mutex); + impl->fragment_shaders.Inject(conf, std::move(shader)); + } else { + // Unsupported shader type got stored somehow so nuke the cache + + LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}", + raw.GetProgramType()); + compilation_failed = true; + return; + } + } else { + // Since precompiled didn't have the dump, we'll load them in the next phase + std::scoped_lock lock(mutex); + load_raws_index.push_back(i); + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size()); + } + } + }; + + const auto LoadPrecompiledProgram = [&](const ShaderDecompiledMap& decompiled_map, + const ShaderDumpsMap& dump_map) { + std::size_t i{0}; + for (const auto& dump : dump_map) { + if (stop_loading) { + break; + } + const u64 unique_identifier{dump.first}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + // Only load the program if its sanitize_mul setting matches + if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader program is dumped, attempt to load it + OGLProgram shader = + GeneratePrecompiledProgram(dump.second, supported_formats, impl->separable); + if (shader.handle != 0) { + SetShaderUniformBlockBindings(shader.handle); + SetShaderSamplerBindings(shader.handle); + impl->program_cache.emplace(unique_identifier, std::move(shader)); + } else { + LOG_ERROR(Frontend, "Failed to link Precompiled program!"); + compilation_failed = true; + break; + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, ++i, dump_map.size()); + } + } + }; + + if (impl->separable) { + LoadPrecompiledShader(0, raws.size(), raws, decompiled, dumps); + } else { + LoadPrecompiledProgram(decompiled, dumps); + } + + bool load_all_raws = false; + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + impl->program_cache.clear(); + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + load_all_raws = true; + } + // TODO(SachinV): Skip loading raws until we implement a proper way to link non-seperable + // shaders. + if (!impl->separable) { + return; + } + + const std::size_t load_raws_size = load_all_raws ? raws.size() : load_raws_index.size(); + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, load_raws_size); + } + + compilation_failed = false; + + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end) { + Frontend::ScopeAcquireContext scope(*context); + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + + const std::size_t raws_index = load_all_raws ? i : load_raws_index[i]; + const auto& raw{raws[raws_index]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + + bool sanitize_mul = false; + GLuint handle{0}; + std::optional result; + // Otherwise decompile and build the shader at boot and save the result to the + // precompiled file + if (raw.GetProgramType() == ProgramType::VS) { + auto [conf, setup] = BuildVSConfigFromRaw(raw); + result = GenerateVertexShader(setup, conf, impl->separable); + OGLShaderStage stage{impl->separable}; + stage.Create(result->code.c_str(), GL_VERTEX_SHADER); + handle = stage.GetHandle(); + sanitize_mul = conf.state.sanitize_mul; + std::scoped_lock lock(mutex); + impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage)); + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + result = GenerateFragmentShader(conf, impl->separable); + OGLShaderStage stage{impl->separable}; + stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER); + handle = stage.GetHandle(); + std::scoped_lock lock(mutex); + impl->fragment_shaders.Inject(conf, std::move(stage)); + } else { + // Unsupported shader type got stored somehow so nuke the cache + LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType()); + compilation_failed = true; + return; + } + if (handle == 0) { + LOG_ERROR(Frontend, "compilation from raw failed {:x} {:x}", + raw.GetProgramCode().at(0), raw.GetProgramCode().at(1)); + compilation_failed = true; + return; + } + + std::scoped_lock lock(mutex); + // If this is a new separable shader, add it the precompiled cache + if (result) { + disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); + disk_cache.SaveDump(unique_identifier, handle); + precompiled_cache_altered = true; + } + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, load_raws_size); + } + } + }; + + const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; + const std::size_t bucket_size{load_raws_size / num_workers}; + std::vector> contexts(num_workers); + std::vector threads(num_workers); + for (std::size_t i = 0; i < num_workers; ++i) { + const bool is_last_worker = i + 1 == num_workers; + const std::size_t start{bucket_size * i}; + const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size}; + + // On some platforms the shared context has to be created from the GUI thread + contexts[i] = emu_window.CreateSharedContext(); + threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end); + } + for (auto& thread : threads) { + thread.join(); + } + + if (compilation_failed) { + disk_cache.InvalidateAll(); + } + + if (precompiled_cache_altered) { + disk_cache.SaveVirtualPrecompiledFile(); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_pipeline_manager.h b/src/video_core/renderer_vulkan/vk_pipeline_manager.h new file mode 100644 index 000000000..8b00b2c94 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_manager.h @@ -0,0 +1,131 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "video_core/rasterizer_interface.h" +#include "video_core/regs_lighting.h" +#include "video_core/renderer_vulkan/pica_to_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_state.h" +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace Core { +class System; +} + +namespace Vulkan { + +enum class UniformBindings : GLuint { Common, VS, GS }; + +struct LightSrc { + alignas(16) glm::vec3 specular_0; + alignas(16) glm::vec3 specular_1; + alignas(16) glm::vec3 diffuse; + alignas(16) glm::vec3 ambient; + alignas(16) glm::vec3 position; + alignas(16) glm::vec3 spot_direction; // negated + float dist_atten_bias; + float dist_atten_scale; +}; + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct UniformData { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + alignas(16) glm::ivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; + alignas(16) glm::vec3 fog_color; + alignas(8) glm::vec2 proctex_noise_f; + alignas(8) glm::vec2 proctex_noise_a; + alignas(8) glm::vec2 proctex_noise_p; + alignas(16) glm::vec3 lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) glm::vec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) glm::vec4 tev_combiner_buffer_color; + alignas(16) glm::vec4 clip_coef; +}; + +static_assert(sizeof(UniformData) == 0x4F0, "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. +// NOTE: the same rule from UniformData also applies here. +struct PicaUniformsData { + void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); + + struct BoolAligned { + alignas(16) int b; + }; + + std::array bools; + alignas(16) std::array i; + alignas(16) std::array f; +}; + +struct VSUniformData { + PicaUniformsData uniforms; +}; + +static_assert(sizeof(VSUniformData) == 1856, "The size of the VSUniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(VSUniformData) < 16384, "VSUniformData structure must be less than 16kb as per the Vulkan spec"); + +using Resource = std::variant; + +/// Includes all required information to build a Vulkan pipeline object +class VKPipelineInfo : private NonCopyable { + VKPipelineInfo() = default; + ~VKPipelineInfo() = default; + + /// Assign a shader module to a specific stage + void AddShaderModule(const vk::ShaderModule& module, vk::ShaderStageFlagBits stage); + + /// Add a texture or a buffer to the target descriptor set + void AddResource(const Resource& resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int set = 0); + +private: + using ResourceInfo = std::pair, vk::DescriptorSetLayoutBinding>; + + std::unordered_map> descriptor_sets; + std::vector shader_stages; +}; + +/// A class that manages the storage and management of Vulkan pipeline objects. +class PipelineManager { +public: + PipelineManager(Frontend::EmuWindow& emu_window); + ~PipelineManager(); + + /// Retrieves the Vulkan pipeline that maps to the current PICA state. + /// If not present, it is compiled and cached + vk::Pipeline GetPipeline(const Pica::Regs& config, Pica::Shader::ShaderSetup& setup); + +private: + std::unordered_map pipelines; + vk::UniquePipelineCache pipeline_cache; + + Frontend::EmuWindow& emu_window; +}; +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..2f04412a4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,2252 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/regs_texturing.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_vars.h" +#include "video_core/renderer_opengl/pica_to_gl.h" +#include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/video_core.h" + +namespace OpenGL { + +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + +MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); +MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); + +static bool IsVendorAmd() { + const std::string_view gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; + return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc."; +} +static bool IsVendorIntel() { + std::string gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; + return gpu_vendor == "Intel Inc."; +} + +RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window) + : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), + uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), + index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), + texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), + texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { + + allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && + GLAD_GL_ARB_framebuffer_no_attachments); + if (!allow_shadow) { + LOG_WARNING(Render_OpenGL, + "Shadow might not be able to render because of unsupported OpenGL extensions."); + } + + if (!GLAD_GL_ARB_copy_image && !GLES) { + LOG_WARNING(Render_OpenGL, + "ARB_copy_image not supported. Some games might produce artifacts."); + } + + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 + state.clip_distance[0] = true; + + // Create a 1x1 clear texture to use in the NULL case, + // instead of OpenGL's default of solid black + glGenTextures(1, &default_texture); + glBindTexture(GL_TEXTURE_2D, default_texture); + // For some reason alpha 0 wraps around to 1.0, so use 1/255 instead + u8 framebuffer_data[4] = {0, 0, 0, 1}; + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); + + // Create sampler objects + for (std::size_t i = 0; i < texture_samplers.size(); ++i) { + texture_samplers[i].Create(); + state.texture_units[i].sampler = texture_samplers[i].sampler.handle; + } + + // Create cubemap texture and sampler objects + texture_cube_sampler.Create(); + state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; + + // Generate VAO + sw_vao.Create(); + hw_vao.Create(); + + uniform_block_data.dirty = true; + + uniform_block_data.lighting_lut_dirty.fill(true); + uniform_block_data.lighting_lut_dirty_any = true; + + uniform_block_data.fog_lut_dirty = true; + + uniform_block_data.proctex_noise_lut_dirty = true; + uniform_block_data.proctex_color_map_dirty = true; + uniform_block_data.proctex_alpha_map_dirty = true; + uniform_block_data.proctex_lut_dirty = true; + uniform_block_data.proctex_diff_lut_dirty = true; + + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + uniform_size_aligned_vs = + Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); + + // Set vertex attributes for software shader path + state.draw.vertex_array = sw_vao.handle; + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.Apply(); + + glVertexAttribPointer(ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, position)); + glEnableVertexAttribArray(ATTRIBUTE_POSITION); + + glVertexAttribPointer(ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, color)); + glEnableVertexAttribArray(ATTRIBUTE_COLOR); + + glVertexAttribPointer(ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord0)); + glVertexAttribPointer(ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord1)); + glVertexAttribPointer(ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord2)); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD0); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD1); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD2); + + glVertexAttribPointer(ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD0_W); + + glVertexAttribPointer(ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, normquat)); + glEnableVertexAttribArray(ATTRIBUTE_NORMQUAT); + + glVertexAttribPointer(ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, view)); + glEnableVertexAttribArray(ATTRIBUTE_VIEW); + + // Create render framebuffer + framebuffer.Create(); + + // Allocate and bind texture buffer lut textures + texture_buffer_lut_lf.Create(); + texture_buffer_lut_rg.Create(); + texture_buffer_lut_rgba.Create(); + state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle; + state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; + state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; + state.Apply(); + glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle()); + glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); + glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); + + // Bind index buffer for hardware shader path + state.draw.vertex_array = hw_vao.handle; + state.Apply(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); + +#ifdef __APPLE__ + if (IsVendorIntel()) { + shader_program_manager = std::make_unique( + emu_window, + VideoCore::g_separable_shader_enabled ? GLAD_GL_ARB_separate_shader_objects : false, + is_amd); + } else { + shader_program_manager = std::make_unique( + emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); + } +#else + shader_program_manager = std::make_unique( + emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); +#endif + + glEnable(GL_BLEND); + + SyncEntireState(); +} + +RasterizerOpenGL::~RasterizerOpenGL() = default; + +void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + shader_program_manager->LoadDiskCache(stop_loading, callback); +} + +void RasterizerOpenGL::SyncEntireState() { + // Sync fixed function OpenGL state + SyncClipEnabled(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncLogicOp(); + SyncStencilTest(); + SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); + + // Sync uniforms + SyncClipCoef(); + SyncDepthScale(); + SyncDepthOffset(); + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.texturing.GetTevStages(); + for (std::size_t index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (unsigned light_index = 0; light_index < 8; light_index++) { + SyncLightSpecular0(light_index); + SyncLightSpecular1(light_index); + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + SyncLightDistanceAttenuationBias(light_index); + SyncLightDistanceAttenuationScale(light_index); + } + + SyncFogColor(); + SyncProcTexNoise(); + SyncProcTexBias(); + SyncShadowBias(); + SyncShadowTextureBias(); +} + +/** + * This is a helper function to resolve an issue when interpolating opposite quaternions. See below + * for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore + * making Dot(Q1, -Q2) positive. + * + * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is + * correct for most cases but can still rotate around the long way sometimes. An implementation + * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check + * between each step would work for those cases at the cost of being more complex to implement. + * + * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around + * these issues, making this basic implementation actually more accurate to the hardware. + */ +static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { + Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; + Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; + + return (Common::Dot(a, b) < 0.f); +} + +void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); +} + +static constexpr std::array vs_attrib_types{ + GL_BYTE, // VertexAttributeFormat::BYTE + GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE + GL_SHORT, // VertexAttributeFormat::SHORT + GL_FLOAT // VertexAttributeFormat::FLOAT +}; + +struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; +}; + +RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + + u32 vertex_min; + u32 vertex_max; + if (is_indexed) { + const auto& index_info = regs.pipeline.index_array; + const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; + const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); + const u16* index_address_16 = reinterpret_cast(index_address_8); + const bool index_u16 = index_info.format != 0; + + vertex_min = 0xFFFF; + vertex_max = 0; + const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + res_cache.FlushRegion(address, size, nullptr); + for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { + const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; + vertex_min = std::min(vertex_min, vertex); + vertex_max = std::max(vertex_max, vertex); + } + } else { + vertex_min = regs.pipeline.vertex_offset; + vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; + } + + const u32 vertex_num = vertex_max - vertex_min + 1; + u32 vs_input_size = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count != 0) { + vs_input_size += loader.byte_count * vertex_num; + } + } + + return {vertex_min, vertex_max, vs_input_size}; +} + +void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, + GLuint vs_input_index_min, GLuint vs_input_index_max) { + MICROPROFILE_SCOPE(OpenGL_VAO); + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); + + state.draw.vertex_array = hw_vao.handle; + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.Apply(); + + std::array enable_attributes{}; + + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count == 0 || loader.byte_count == 0) { + continue; + } + + u32 offset = 0; + for (u32 comp = 0; comp < loader.component_count && comp < 12; ++comp) { + u32 attribute_index = loader.GetComponent(comp); + if (attribute_index < 12) { + if (vertex_attributes.GetNumElements(attribute_index) != 0) { + offset = Common::AlignUp( + offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); + + u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); + GLint size = vertex_attributes.GetNumElements(attribute_index); + GLenum type = vs_attrib_types[static_cast( + vertex_attributes.GetFormat(attribute_index))]; + GLsizei stride = loader.byte_count; + glVertexAttribPointer(input_reg, size, type, GL_FALSE, stride, + reinterpret_cast(buffer_offset + offset)); + enable_attributes[input_reg] = true; + + offset += vertex_attributes.GetStride(attribute_index); + } + } else { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, + // respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } + } + + PAddr data_addr = + base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); + + u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + u32 data_size = loader.byte_count * vertex_num; + + res_cache.FlushRegion(data_addr, data_size, nullptr); + std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); + + array_ptr += data_size; + buffer_offset += data_size; + } + + for (std::size_t i = 0; i < enable_attributes.size(); ++i) { + if (enable_attributes[i] != hw_vao_enabled_attributes[i]) { + if (enable_attributes[i]) { + glEnableVertexAttribArray(static_cast(i)); + } else { + glDisableVertexAttribArray(static_cast(i)); + } + hw_vao_enabled_attributes[i] = enable_attributes[i]; + } + + if (vertex_attributes.IsDefaultAttribute(i)) { + const u32 reg = regs.vs.GetRegisterForAttribute(i); + if (!enable_attributes[reg]) { + const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), + attr.w.ToFloat32()); + } + } + } +} + +bool RasterizerOpenGL::SetupVertexShader() { + MICROPROFILE_SCOPE(OpenGL_VS); + return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs, + Pica::g_state.vs); +} + +bool RasterizerOpenGL::SetupGeometryShader() { + MICROPROFILE_SCOPE(OpenGL_GS); + const auto& regs = Pica::g_state.regs; + + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); + return false; + } + + shader_program_manager->UseFixedGeometryShader(regs); + return true; +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { + return false; + } + if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { + return false; + } + } + + if (!SetupVertexShader()) + return false; + + if (!SetupGeometryShader()) + return false; + + return Draw(true, is_indexed); +} + +static GLenum GetCurrentPrimitiveMode() { + const auto& regs = Pica::g_state.regs; + switch (regs.pipeline.triangle_topology) { + case Pica::PipelineRegs::TriangleTopology::Shader: + case Pica::PipelineRegs::TriangleTopology::List: + return GL_TRIANGLES; + case Pica::PipelineRegs::TriangleTopology::Fan: + return GL_TRIANGLE_FAN; + case Pica::PipelineRegs::TriangleTopology::Strip: + return GL_TRIANGLE_STRIP; + default: + UNREACHABLE(); + } +} + +bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + GLenum primitive_mode = GetCurrentPrimitiveMode(); + + auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); + + if (vs_input_size > VERTEX_BUFFER_SIZE) { + LOG_WARNING(Render_OpenGL, "Too large vertex input size {}", vs_input_size); + return false; + } + + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.Apply(); + + u8* buffer_ptr; + GLintptr buffer_offset; + std::tie(buffer_ptr, buffer_offset, std::ignore) = vertex_buffer.Map(vs_input_size, 4); + SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max); + vertex_buffer.Unmap(vs_input_size); + + shader_program_manager->ApplyTo(state); + state.Apply(); + + if (is_indexed) { + bool index_u16 = regs.pipeline.index_array.format != 0; + std::size_t index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + + if (index_buffer_size > INDEX_BUFFER_SIZE) { + LOG_WARNING(Render_OpenGL, "Too large index input size {}", index_buffer_size); + return false; + } + + const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( + regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + std::tie(buffer_ptr, buffer_offset, std::ignore) = index_buffer.Map(index_buffer_size, 4); + std::memcpy(buffer_ptr, index_data, index_buffer_size); + index_buffer.Unmap(index_buffer_size); + + glDrawRangeElementsBaseVertex( + primitive_mode, vs_input_index_min, vs_input_index_max, regs.pipeline.num_vertices, + index_u16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, + reinterpret_cast(buffer_offset), -static_cast(vs_input_index_min)); + } else { + glDrawArrays(primitive_mode, 0, regs.pipeline.num_vertices); + } + return true; +} + +void RasterizerOpenGL::DrawTriangles() { + if (vertex_batch.empty()) + return; + Draw(false, false); +} + +bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { + MICROPROFILE_SCOPE(OpenGL_Drawing); + const auto& regs = Pica::g_state.regs; + + bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + + const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE || + state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || + state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && state.stencil.test_enabled)); + + Common::Rectangle viewport_rect_unscaled{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + Surface color_surface; + Surface depth_surface; + Common::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + Common::Rectangle draw_rect{ + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + if (shadow_rendering) { + if (!allow_shadow || color_surface == nullptr) { + return true; + } + glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, + color_surface->width * color_surface->res_scale); + glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT, + color_surface->height * color_surface->res_scale); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + state.image_shadow_buffer = color_surface->texture.handle; + } else { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } + } + + // Sync the viewport + state.viewport.x = + static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; + state.viewport.y = + static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; + state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); + state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); + + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; + uniform_block_data.dirty = true; + } + + // Scissor checks are window-, not viewport-relative, which means that if the cached texture + // sub-rect changes, the scissor bounds also need to be updated. + GLint scissor_x1 = + static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + GLint scissor_y1 = + static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); + // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when + // scaling or doing multisampling. + GLint scissor_x2 = + static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + GLint scissor_y2 = static_cast(surfaces_rect.bottom + + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); + + if (uniform_block_data.data.scissor_x1 != scissor_x1 || + uniform_block_data.data.scissor_x2 != scissor_x2 || + uniform_block_data.data.scissor_y1 != scissor_y1 || + uniform_block_data.data.scissor_y2 != scissor_y2) { + + uniform_block_data.data.scissor_x1 = scissor_x1; + uniform_block_data.data.scissor_x2 = scissor_x2; + uniform_block_data.data.scissor_y1 = scissor_y1; + uniform_block_data.data.scissor_y2 = scissor_y2; + uniform_block_data.dirty = true; + } + + bool need_duplicate_texture = false; + auto CheckBarrier = [&need_duplicate_texture, &color_surface](GLuint handle) { + if (color_surface && color_surface->texture.handle == handle) { + need_duplicate_texture = true; + } + }; + + // Sync and bind the texture surfaces + const auto pica_textures = regs.texturing.GetTextures(); + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + if (texture.enabled) { + if (texture_index == 0) { + using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + if (!allow_shadow) + continue; + + Surface surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); + } else { + state.image_shadow_texture_px = 0; + } + continue; + } + case TextureType::ShadowCube: { + if (!allow_shadow) + continue; + Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister( + texture.config, texture.format); + Surface surface; + + using CubeFace = Pica::TexturingRegs::CubeFace; + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); + } else { + state.image_shadow_texture_px = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_nx = surface->texture.handle); + } else { + state.image_shadow_texture_nx = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_py = surface->texture.handle); + } else { + state.image_shadow_texture_py = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_ny = surface->texture.handle); + } else { + state.image_shadow_texture_ny = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_pz = surface->texture.handle); + } else { + state.image_shadow_texture_pz = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_nz = surface->texture.handle); + } else { + state.image_shadow_texture_nz = 0; + } + + continue; + } + case TextureType::TextureCube: + using CubeFace = Pica::TexturingRegs::CubeFace; + TextureCubeConfig config; + config.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + config.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + config.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + config.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + config.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + config.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + config.width = texture.config.width; + config.format = texture.format; + state.texture_cube_unit.texture_cube = + res_cache.GetTextureCube(config).texture.handle; + + texture_cube_sampler.SyncWithConfig(texture.config); + state.texture_units[texture_index].texture_2d = 0; + continue; // Texture unit 0 setup finished. Continue to next unit + } + state.texture_cube_unit.texture_cube = 0; + } + + texture_samplers[texture_index].SyncWithConfig(texture.config); + Surface surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + CheckBarrier(state.texture_units[texture_index].texture_2d = + surface->texture.handle); + } else { + // Can occur when texture addr is null or its memory is unmapped/invalid + // HACK: In this case, the correct behaviour for the PICA is to use the last + // rendered colour. But because this would be impractical to implement, the + // next best alternative is to use a clear texture, essentially skipping + // the geometry in question. + // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn + // on the male character's face, which in the OpenGL default appear black. + state.texture_units[texture_index].texture_2d = default_texture; + } + } else { + state.texture_units[texture_index].texture_2d = 0; + } + } + + OGLTexture temp_tex; + if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) { + // The game is trying to use a surface as a texture and framebuffer at the same time + // which causes unpredictable behavior on the host. + // Making a copy to sample from eliminates this issue and seems to be fairly cheap. + temp_tex.Create(); + glBindTexture(GL_TEXTURE_2D, temp_tex.handle); + auto [internal_format, format, type] = GetFormatTuple(color_surface->pixel_format); + OGLTexture::Allocate(GL_TEXTURE_2D, color_surface->max_level + 1, internal_format, format, + type, color_surface->GetScaledWidth(), + color_surface->GetScaledHeight()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glBindTexture(GL_TEXTURE_2D, state.texture_units[0].texture_2d); + + for (std::size_t level{0}; level <= color_surface->max_level; ++level) { + glCopyImageSubData(color_surface->texture.handle, GL_TEXTURE_2D, level, 0, 0, 0, + temp_tex.handle, GL_TEXTURE_2D, level, 0, 0, 0, + color_surface->GetScaledWidth() >> level, + color_surface->GetScaledHeight() >> level, 1); + } + + for (auto& unit : state.texture_units) { + if (unit.texture_2d == color_surface->texture.handle) { + unit.texture_2d = temp_tex.handle; + } + } + for (auto shadow_unit : {&state.image_shadow_texture_nx, &state.image_shadow_texture_ny, + &state.image_shadow_texture_nz, &state.image_shadow_texture_px, + &state.image_shadow_texture_py, &state.image_shadow_texture_pz}) { + if (*shadow_unit == color_surface->texture.handle) { + *shadow_unit = temp_tex.handle; + } + } + } + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); + + // Sync the uniform data + UploadUniforms(accelerate); + + // Viewport can have negative offsets or larger + // dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing + // outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); + state.Apply(); + + // Draw the vertex batch + bool succeeded = true; + if (accelerate) { + succeeded = AccelerateDrawBatchInternal(is_indexed); + } else { + state.draw.vertex_array = sw_vao.handle; + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + shader_program_manager->UseTrivialVertexShader(); + shader_program_manager->UseTrivialGeometryShader(); + shader_program_manager->ApplyTo(state); + state.Apply(); + + std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); + for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); + base_vertex += max_vertices) { + const std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); + const std::size_t vertex_size = vertices * sizeof(HardwareVertex); + u8* vbo; + GLintptr offset; + std::tie(vbo, offset, std::ignore) = + vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); + std::memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); + vertex_buffer.Unmap(vertex_size); + glDrawArrays(GL_TRIANGLES, static_cast(offset / sizeof(HardwareVertex)), + static_cast(vertices)); + } + } + + vertex_batch.clear(); + + // Reset textures in rasterizer state context because the rasterizer cache might delete them + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + state.texture_units[texture_index].texture_2d = 0; + } + state.texture_cube_unit.texture_cube = 0; + if (allow_shadow) { + state.image_shadow_texture_px = 0; + state.image_shadow_texture_nx = 0; + state.image_shadow_texture_py = 0; + state.image_shadow_texture_ny = 0; + state.image_shadow_texture_pz = 0; + state.image_shadow_texture_nz = 0; + state.image_shadow_buffer = 0; + } + state.Apply(); + + if (shadow_rendering) { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | + GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); + } + + // Mark framebuffer surfaces as dirty + Common::Rectangle draw_rect_unscaled{draw_rect.left / res_scale, draw_rect.top / res_scale, + draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } + + return succeeded; +} + +void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { + const auto& regs = Pica::g_state.regs; + + switch (id) { + // Culling + case PICA_REG_INDEX(rasterizer.cull_mode): + SyncCullMode(); + break; + + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX(rasterizer.clip_coef[0]): + case PICA_REG_INDEX(rasterizer.clip_coef[1]): + case PICA_REG_INDEX(rasterizer.clip_coef[2]): + case PICA_REG_INDEX(rasterizer.clip_coef[3]): + SyncClipCoef(); + break; + + // Depth modifiers + case PICA_REG_INDEX(rasterizer.viewport_depth_range): + SyncDepthScale(); + break; + case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane): + SyncDepthOffset(); + break; + + // Depth buffering + case PICA_REG_INDEX(rasterizer.depthmap_enable): + shader_dirty = true; + break; + + // Blending + case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + shader_dirty = true; + } + SyncBlendEnabled(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.blend_const): + SyncBlendColor(); + break; + + // Shadow texture + case PICA_REG_INDEX(texturing.shadow): + SyncShadowTextureBias(); + break; + + // Fog state + case PICA_REG_INDEX(texturing.fog_color): + SyncFogColor(); + break; + case PICA_REG_INDEX(texturing.fog_lut_data[0]): + case PICA_REG_INDEX(texturing.fog_lut_data[1]): + case PICA_REG_INDEX(texturing.fog_lut_data[2]): + case PICA_REG_INDEX(texturing.fog_lut_data[3]): + case PICA_REG_INDEX(texturing.fog_lut_data[4]): + case PICA_REG_INDEX(texturing.fog_lut_data[5]): + case PICA_REG_INDEX(texturing.fog_lut_data[6]): + case PICA_REG_INDEX(texturing.fog_lut_data[7]): + uniform_block_data.fog_lut_dirty = true; + break; + + // ProcTex state + case PICA_REG_INDEX(texturing.proctex): + case PICA_REG_INDEX(texturing.proctex_lut): + case PICA_REG_INDEX(texturing.proctex_lut_offset): + SyncProcTexBias(); + shader_dirty = true; + break; + + case PICA_REG_INDEX(texturing.proctex_noise_u): + case PICA_REG_INDEX(texturing.proctex_noise_v): + case PICA_REG_INDEX(texturing.proctex_noise_frequency): + SyncProcTexNoise(); + break; + + case PICA_REG_INDEX(texturing.proctex_lut_data[0]): + case PICA_REG_INDEX(texturing.proctex_lut_data[1]): + case PICA_REG_INDEX(texturing.proctex_lut_data[2]): + case PICA_REG_INDEX(texturing.proctex_lut_data[3]): + case PICA_REG_INDEX(texturing.proctex_lut_data[4]): + case PICA_REG_INDEX(texturing.proctex_lut_data[5]): + case PICA_REG_INDEX(texturing.proctex_lut_data[6]): + case PICA_REG_INDEX(texturing.proctex_lut_data[7]): + using Pica::TexturingRegs; + switch (regs.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + uniform_block_data.proctex_noise_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + uniform_block_data.proctex_color_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + uniform_block_data.proctex_alpha_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::Color: + uniform_block_data.proctex_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + uniform_block_data.proctex_diff_lut_dirty = true; + break; + } + break; + + // Alpha test + case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): + SyncAlphaTest(); + shader_dirty = true; + break; + + // Sync GL stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): + SyncStencilTest(); + break; + + // Sync GL depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) + case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): + SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync GL depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync GL color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): + SyncColorWriteMask(); + break; + + case PICA_REG_INDEX(framebuffer.shadow): + SyncShadowBias(); + break; + + // Scissor test + case PICA_REG_INDEX(rasterizer.scissor_test.mode): + shader_dirty = true; + break; + + // Logic op + case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + shader_dirty = true; + } + SyncLogicOp(); + break; + + case PICA_REG_INDEX(texturing.main_config): + shader_dirty = true; + break; + + // Texture 0 type + case PICA_REG_INDEX(texturing.texture0.type): + shader_dirty = true; + break; + + // TEV stages + // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) + case PICA_REG_INDEX(texturing.tev_stage0.color_source1): + case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage0.color_op): + case PICA_REG_INDEX(texturing.tev_stage0.color_scale): + case PICA_REG_INDEX(texturing.tev_stage1.color_source1): + case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage1.color_op): + case PICA_REG_INDEX(texturing.tev_stage1.color_scale): + case PICA_REG_INDEX(texturing.tev_stage2.color_source1): + case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage2.color_op): + case PICA_REG_INDEX(texturing.tev_stage2.color_scale): + case PICA_REG_INDEX(texturing.tev_stage3.color_source1): + case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage3.color_op): + case PICA_REG_INDEX(texturing.tev_stage3.color_scale): + case PICA_REG_INDEX(texturing.tev_stage4.color_source1): + case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage4.color_op): + case PICA_REG_INDEX(texturing.tev_stage4.color_scale): + case PICA_REG_INDEX(texturing.tev_stage5.color_source1): + case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage5.color_op): + case PICA_REG_INDEX(texturing.tev_stage5.color_scale): + case PICA_REG_INDEX(texturing.tev_combiner_buffer_input): + shader_dirty = true; + break; + case PICA_REG_INDEX(texturing.tev_stage0.const_r): + SyncTevConstColor(0, regs.texturing.tev_stage0); + break; + case PICA_REG_INDEX(texturing.tev_stage1.const_r): + SyncTevConstColor(1, regs.texturing.tev_stage1); + break; + case PICA_REG_INDEX(texturing.tev_stage2.const_r): + SyncTevConstColor(2, regs.texturing.tev_stage2); + break; + case PICA_REG_INDEX(texturing.tev_stage3.const_r): + SyncTevConstColor(3, regs.texturing.tev_stage3); + break; + case PICA_REG_INDEX(texturing.tev_stage4.const_r): + SyncTevConstColor(4, regs.texturing.tev_stage4); + break; + case PICA_REG_INDEX(texturing.tev_stage5.const_r): + SyncTevConstColor(5, regs.texturing.tev_stage5); + break; + + // TEV combiner buffer color + case PICA_REG_INDEX(texturing.tev_combiner_buffer_color): + SyncCombinerColor(); + break; + + // Fragment lighting switches + case PICA_REG_INDEX(lighting.disable): + case PICA_REG_INDEX(lighting.max_light_index): + case PICA_REG_INDEX(lighting.config0): + case PICA_REG_INDEX(lighting.config1): + case PICA_REG_INDEX(lighting.abs_lut_input): + case PICA_REG_INDEX(lighting.lut_input): + case PICA_REG_INDEX(lighting.lut_scale): + case PICA_REG_INDEX(lighting.light_enable): + break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX(lighting.light[0].specular_0): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_0): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_0): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_0): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_0): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_0): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_0): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_0): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX(lighting.light[0].specular_1): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_1): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_1): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_1): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_1): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_1): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_1): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_1): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX(lighting.light[0].diffuse): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX(lighting.light[1].diffuse): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX(lighting.light[2].diffuse): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX(lighting.light[3].diffuse): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX(lighting.light[4].diffuse): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX(lighting.light[5].diffuse): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX(lighting.light[6].diffuse): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX(lighting.light[7].diffuse): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX(lighting.light[0].ambient): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX(lighting.light[1].ambient): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX(lighting.light[2].ambient): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX(lighting.light[3].ambient): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX(lighting.light[4].ambient): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX(lighting.light[5].ambient): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX(lighting.light[6].ambient): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX(lighting.light[7].ambient): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX(lighting.light[0].x): + case PICA_REG_INDEX(lighting.light[0].z): + SyncLightPosition(0); + break; + case PICA_REG_INDEX(lighting.light[1].x): + case PICA_REG_INDEX(lighting.light[1].z): + SyncLightPosition(1); + break; + case PICA_REG_INDEX(lighting.light[2].x): + case PICA_REG_INDEX(lighting.light[2].z): + SyncLightPosition(2); + break; + case PICA_REG_INDEX(lighting.light[3].x): + case PICA_REG_INDEX(lighting.light[3].z): + SyncLightPosition(3); + break; + case PICA_REG_INDEX(lighting.light[4].x): + case PICA_REG_INDEX(lighting.light[4].z): + SyncLightPosition(4); + break; + case PICA_REG_INDEX(lighting.light[5].x): + case PICA_REG_INDEX(lighting.light[5].z): + SyncLightPosition(5); + break; + case PICA_REG_INDEX(lighting.light[6].x): + case PICA_REG_INDEX(lighting.light[6].z): + SyncLightPosition(6); + break; + case PICA_REG_INDEX(lighting.light[7].x): + case PICA_REG_INDEX(lighting.light[7].z): + SyncLightPosition(7); + break; + + // Fragment spot lighting direction + case PICA_REG_INDEX(lighting.light[0].spot_x): + case PICA_REG_INDEX(lighting.light[0].spot_z): + SyncLightSpotDirection(0); + break; + case PICA_REG_INDEX(lighting.light[1].spot_x): + case PICA_REG_INDEX(lighting.light[1].spot_z): + SyncLightSpotDirection(1); + break; + case PICA_REG_INDEX(lighting.light[2].spot_x): + case PICA_REG_INDEX(lighting.light[2].spot_z): + SyncLightSpotDirection(2); + break; + case PICA_REG_INDEX(lighting.light[3].spot_x): + case PICA_REG_INDEX(lighting.light[3].spot_z): + SyncLightSpotDirection(3); + break; + case PICA_REG_INDEX(lighting.light[4].spot_x): + case PICA_REG_INDEX(lighting.light[4].spot_z): + SyncLightSpotDirection(4); + break; + case PICA_REG_INDEX(lighting.light[5].spot_x): + case PICA_REG_INDEX(lighting.light[5].spot_z): + SyncLightSpotDirection(5); + break; + case PICA_REG_INDEX(lighting.light[6].spot_x): + case PICA_REG_INDEX(lighting.light[6].spot_z): + SyncLightSpotDirection(6); + break; + case PICA_REG_INDEX(lighting.light[7].spot_x): + case PICA_REG_INDEX(lighting.light[7].spot_z): + SyncLightSpotDirection(7); + break; + + // Fragment lighting light source config + case PICA_REG_INDEX(lighting.light[0].config): + case PICA_REG_INDEX(lighting.light[1].config): + case PICA_REG_INDEX(lighting.light[2].config): + case PICA_REG_INDEX(lighting.light[3].config): + case PICA_REG_INDEX(lighting.light[4].config): + case PICA_REG_INDEX(lighting.light[5].config): + case PICA_REG_INDEX(lighting.light[6].config): + case PICA_REG_INDEX(lighting.light[7].config): + shader_dirty = true; + break; + + // Fragment lighting distance attenuation bias + case PICA_REG_INDEX(lighting.light[0].dist_atten_bias): + SyncLightDistanceAttenuationBias(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_bias): + SyncLightDistanceAttenuationBias(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_bias): + SyncLightDistanceAttenuationBias(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_bias): + SyncLightDistanceAttenuationBias(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_bias): + SyncLightDistanceAttenuationBias(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_bias): + SyncLightDistanceAttenuationBias(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_bias): + SyncLightDistanceAttenuationBias(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_bias): + SyncLightDistanceAttenuationBias(7); + break; + + // Fragment lighting distance attenuation scale + case PICA_REG_INDEX(lighting.light[0].dist_atten_scale): + SyncLightDistanceAttenuationScale(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_scale): + SyncLightDistanceAttenuationScale(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_scale): + SyncLightDistanceAttenuationScale(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_scale): + SyncLightDistanceAttenuationScale(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_scale): + SyncLightDistanceAttenuationScale(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_scale): + SyncLightDistanceAttenuationScale(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_scale): + SyncLightDistanceAttenuationScale(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_scale): + SyncLightDistanceAttenuationScale(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX(lighting.global_ambient): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX(lighting.lut_data[0]): + case PICA_REG_INDEX(lighting.lut_data[1]): + case PICA_REG_INDEX(lighting.lut_data[2]): + case PICA_REG_INDEX(lighting.lut_data[3]): + case PICA_REG_INDEX(lighting.lut_data[4]): + case PICA_REG_INDEX(lighting.lut_data[5]): + case PICA_REG_INDEX(lighting.lut_data[6]): + case PICA_REG_INDEX(lighting.lut_data[7]): { + const auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty_any = true; + break; + } + } +} + +void RasterizerOpenGL::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushAll(); +} + +void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerOpenGL::ClearAll(bool flush) { + res_cache.ClearAll(flush); +} + +bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + MICROPROFILE_SCOPE(OpenGL_Blits); + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.width = config.output_width; + src_params.stride = config.input_width; + src_params.height = config.output_height; + src_params.is_tiled = !config.input_linear; + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); + + SurfaceParams dst_params; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + dst_params.is_tiled = config.input_linear != config.dont_swizzle; + dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); + + Common::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + if (src_surface == nullptr) + return false; + + dst_params.res_scale = src_surface->res_scale; + + Common::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); + if (dst_surface == nullptr) + return false; + + if (src_surface->is_tiled != dst_surface->is_tiled) + std::swap(src_rect.top, src_rect.bottom); + + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { + return false; + } + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; + } + + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0 && output_gap != 0) { + return false; + } + if (output_gap == 0 || output_width >= copy_size) { + output_width = copy_size; + output_gap = 0; + } + if (copy_size % output_width != 0) { + return false; + } + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = copy_size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + Common::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) { + return false; + } + + if (output_gap != 0 && + (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || + output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { + return false; + } + + SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_surface->res_scale; + dst_params.stride = dst_params.width + src_surface->PixelsInBytes( + src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_surface->res_scale; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); + + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; + Common::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + if (dst_surface == nullptr) { + return false; + } + + if (dst_surface->type == SurfaceType::Texture) { + return false; + } + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { + return false; + } + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + Surface dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) + return false; + + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) { + return false; + } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(config.width.Value(), pixel_stride); + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); + + Common::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = Common::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = src_surface->texture.handle; + + return true; +} + +void RasterizerOpenGL::SamplerInfo::Create() { + sampler.Create(); + mag_filter = min_filter = mip_filter = TextureConfig::Linear; + wrap_s = wrap_t = TextureConfig::Repeat; + border_color = 0; + lod_min = lod_max = 0; + lod_bias = 0; + + // default is 1000 and -1000 + // Other attributes have correct defaults + glSamplerParameterf(sampler.handle, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); + glSamplerParameterf(sampler.handle, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); +} + +void RasterizerOpenGL::SamplerInfo::SyncWithConfig( + const Pica::TexturingRegs::TextureConfig& config) { + + GLuint s = sampler.handle; + + if (mag_filter != config.mag_filter) { + mag_filter = config.mag_filter; + glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureMagFilterMode(mag_filter)); + } + + // TODO(wwylele): remove new_supress_mipmap_for_cube logic once mipmap for cube is implemented + bool new_supress_mipmap_for_cube = + config.type == Pica::TexturingRegs::TextureConfig::TextureCube; + if (min_filter != config.min_filter || mip_filter != config.mip_filter || + supress_mipmap_for_cube != new_supress_mipmap_for_cube) { + min_filter = config.min_filter; + mip_filter = config.mip_filter; + supress_mipmap_for_cube = new_supress_mipmap_for_cube; + if (new_supress_mipmap_for_cube) { + // HACK: use mag filter converter for min filter because they are the same anyway + glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, + PicaToGL::TextureMagFilterMode(min_filter)); + } else { + glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, + PicaToGL::TextureMinFilterMode(min_filter, mip_filter)); + } + } + + if (wrap_s != config.wrap_s) { + wrap_s = config.wrap_s; + glSamplerParameteri(s, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(wrap_s)); + } + if (wrap_t != config.wrap_t) { + wrap_t = config.wrap_t; + glSamplerParameteri(s, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(wrap_t)); + } + + if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { + if (border_color != config.border_color.raw) { + border_color = config.border_color.raw; + auto gl_color = PicaToGL::ColorRGBA8(border_color); + glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); + } + } + + if (lod_min != config.lod.min_level) { + lod_min = config.lod.min_level; + glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); + } + + if (lod_max != config.lod.max_level) { + lod_max = config.lod.max_level; + glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); + } + + if (!GLES && lod_bias != config.lod.bias) { + lod_bias = config.lod.bias; + glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias / 256.0f); + } +} + +void RasterizerOpenGL::SetShader() { + shader_program_manager->UseFragmentShader(Pica::g_state.regs); +} + +void RasterizerOpenGL::SyncClipEnabled() { + state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerOpenGL::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncCullMode() { + const auto& regs = Pica::g_state.regs; + + switch (regs.rasterizer.cull_mode) { + case Pica::RasterizerRegs::CullMode::KeepAll: + state.cull.enabled = false; + break; + + case Pica::RasterizerRegs::CullMode::KeepClockWise: + state.cull.enabled = true; + state.cull.front_face = GL_CW; + break; + + case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: + state.cull.enabled = true; + state.cull.front_face = GL_CCW; + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown cull mode {}", + static_cast(regs.rasterizer.cull_mode.Value())); + UNIMPLEMENTED(); + break; + } +} + +void RasterizerOpenGL::SyncDepthScale() { + float depth_scale = + Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); + if (depth_scale != uniform_block_data.data.depth_scale) { + uniform_block_data.data.depth_scale = depth_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncDepthOffset() { + float depth_offset = + Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32(); + if (depth_offset != uniform_block_data.data.depth_offset) { + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncBlendEnabled() { + state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1); +} + +void RasterizerOpenGL::SyncBlendFuncs() { + const auto& regs = Pica::g_state.regs; + state.blend.rgb_equation = + PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + state.blend.a_equation = + PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + state.blend.src_rgb_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + state.blend.src_a_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); + state.blend.dst_a_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerOpenGL::SyncBlendColor() { + auto blend_color = + PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); + state.blend.color.red = blend_color[0]; + state.blend.color.green = blend_color[1]; + state.blend.color.blue = blend_color[2]; + state.blend.color.alpha = blend_color[3]; +} + +void RasterizerOpenGL::SyncFogColor() { + const auto& regs = Pica::g_state.regs; + uniform_block_data.data.fog_color = { + regs.texturing.fog_color.r.Value() / 255.0f, + regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.b.Value() / 255.0f, + }; + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncProcTexNoise() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_noise_f = { + Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), + }; + uniform_block_data.data.proctex_noise_a = { + regs.proctex_noise_u.amplitude / 4095.0f, + regs.proctex_noise_v.amplitude / 4095.0f, + }; + uniform_block_data.data.proctex_noise_p = { + Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), + }; + + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncProcTexBias() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_bias = + Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8)) + .ToFloat32(); + + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncAlphaTest() { + const auto& regs = Pica::g_state.regs; + if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { + uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLogicOp() { + const auto& regs = Pica::g_state.regs; + state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); + + if (GLES) { + if (!regs.framebuffer.output_merger.alphablend_enable) { + if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + state.color_mask = {}; + } + } + } +} + +void RasterizerOpenGL::SyncColorWriteMask() { + const auto& regs = Pica::g_state.regs; + if (GLES) { + if (!regs.framebuffer.output_merger.alphablend_enable) { + if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. Return early to avoid overwriting this. + return; + } + } + } + + auto IsColorWriteEnabled = [&](u32 value) { + return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE + : GL_FALSE; + }; + + state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable); + state.color_mask.green_enabled = + IsColorWriteEnabled(regs.framebuffer.output_merger.green_enable); + state.color_mask.blue_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.blue_enable); + state.color_mask.alpha_enabled = + IsColorWriteEnabled(regs.framebuffer.output_merger.alpha_enable); +} + +void RasterizerOpenGL::SyncStencilWriteMask() { + const auto& regs = Pica::g_state.regs; + state.stencil.write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; +} + +void RasterizerOpenGL::SyncDepthWriteMask() { + const auto& regs = Pica::g_state.regs; + state.depth.write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable) + ? GL_TRUE + : GL_FALSE; +} + +void RasterizerOpenGL::SyncStencilTest() { + const auto& regs = Pica::g_state.regs; + state.stencil.test_enabled = + regs.framebuffer.output_merger.stencil_test.enable && + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + state.stencil.test_func = + PicaToGL::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); + state.stencil.test_ref = regs.framebuffer.output_merger.stencil_test.reference_value; + state.stencil.test_mask = regs.framebuffer.output_merger.stencil_test.input_mask; + state.stencil.action_stencil_fail = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); + state.stencil.action_depth_pass = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); +} + +void RasterizerOpenGL::SyncDepthTest() { + const auto& regs = Pica::g_state.regs; + state.depth.test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + state.depth.test_func = + regs.framebuffer.output_merger.depth_test_enable == 1 + ? PicaToGL::CompareFunc(regs.framebuffer.output_merger.depth_test_func) + : GL_ALWAYS; +} + +void RasterizerOpenGL::SyncCombinerColor() { + auto combiner_color = + PicaToGL::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); + if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { + uniform_block_data.data.tev_combiner_buffer_color = combiner_color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncTevConstColor(std::size_t stage_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage) { + const auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); + + if (const_color == uniform_block_data.data.const_color[stage_index]) { + return; + } + + uniform_block_data.data.const_color[stage_index] = const_color; + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncGlobalAmbient() { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular0(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular1(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDiffuse(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightAmbient(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightPosition(int light_index) { + GLvec3 position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()}; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpotDirection(int light_index) { + const auto& light = Pica::g_state.regs.lighting.light[light_index]; + GLvec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f, + light.spot_z / 2047.0f}; + + if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { + uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { + GLfloat dist_atten_bias = + Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) + .ToFloat32(); + + if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { + uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { + GLfloat dist_atten_scale = + Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) + .ToFloat32(); + + if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { + uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncShadowBias() { + const auto& shadow = Pica::g_state.regs.framebuffer.shadow; + GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); + GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); + + if (constant != uniform_block_data.data.shadow_bias_constant || + linear != uniform_block_data.data.shadow_bias_linear) { + uniform_block_data.data.shadow_bias_constant = constant; + uniform_block_data.data.shadow_bias_linear = linear; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncShadowTextureBias() { + GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1; + if (bias != uniform_block_data.data.shadow_texture_bias) { + uniform_block_data.data.shadow_texture_bias = bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + std::size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(GLvec2)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + uniform_block_data.lighting_lut_dirty_any = false; + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.fog_lut_offset = + static_cast((offset + bytes_used) / sizeof(GLvec2)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + uniform_block_data.fog_lut_dirty = false; + } + + texture_lf_buffer.Unmap(bytes_used); +} + +void RasterizerOpenGL::SyncAndUploadLUTs() { + constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha + sizeof(GLvec4) * 256 + // proctex + sizeof(GLvec4) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + std::size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( + const std::array& lut, + std::array& lut_data, GLint& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + lut_offset = static_cast((offset + bytes_used) / sizeof(GLvec2)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(GLvec4)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(GLvec4)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Unmap(bytes_used); +} + +void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { + // glBindBufferRange below also changes the generic buffer binding point, so we sync the state + // first + state.draw.uniform_buffer = uniform_buffer.GetHandle(); + state.Apply(); + + bool sync_vs = accelerate_draw; + bool sync_fs = uniform_block_data.dirty; + + if (!sync_vs && !sync_fs) + return; + + std::size_t uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + std::size_t used_bytes = 0; + u8* uniforms; + GLintptr offset; + bool invalidate; + std::tie(uniforms, offset, invalidate) = + uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + + if (sync_vs) { + VSUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); + std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); + glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(UniformBindings::VS), + uniform_buffer.GetHandle(), offset + used_bytes, sizeof(VSUniformData)); + used_bytes += uniform_size_aligned_vs; + } + + if (sync_fs || invalidate) { + std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData)); + glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(UniformBindings::Common), + uniform_buffer.GetHandle(), offset + used_bytes, sizeof(UniformData)); + uniform_block_data.dirty = false; + used_bytes += uniform_size_aligned_fs; + } + + uniform_buffer.Unmap(used_bytes); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 000000000..9c11dca6e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,338 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "video_core/pica_state.h" +#include "video_core/pica_types.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_lighting.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/regs_texturing.h" +#include "video_core/shader/shader.h" + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { +class ShaderProgramManager; + +class RasterizerVulkan : public VideoCore::RasterizerInterface { +public: + explicit RasterizerVulkan(Frontend::EmuWindow& emu_window); + ~RasterizerVulkan() override; + + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; + void DrawTriangles() override; + void NotifyPicaRegisterChanged(u32 id) override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + void ClearAll(bool flush) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, OpenGL::ScreenInfo& screen_info) override; + bool AccelerateDrawBatch(bool is_indexed) override; + + /// Syncs entire status to match PICA registers + void SyncEntireState() override; + +private: + struct SamplerInfo { + using TextureConfig = Pica::TexturingRegs::TextureConfig; + + OGLSampler sampler; + + /// Creates the sampler object, initializing its state so that it's in sync with the + /// SamplerInfo struct. + void Create(); + /// Syncs the sampler object with the config, updating any necessary state. + void SyncWithConfig(const TextureConfig& config); + + private: + TextureConfig::TextureFilter mag_filter; + TextureConfig::TextureFilter min_filter; + TextureConfig::TextureFilter mip_filter; + TextureConfig::WrapMode wrap_s; + TextureConfig::WrapMode wrap_t; + u32 border_color; + u32 lod_min; + u32 lod_max; + s32 lod_bias; + + // TODO(wwylele): remove this once mipmap for cube is implemented + bool supress_mipmap_for_cube = false; + }; + + struct VertexInfo + { + VertexInfo() = default; + VertexInfo(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { + position[0] = v.pos.x.ToFloat32(); + position[1] = v.pos.y.ToFloat32(); + position[2] = v.pos.z.ToFloat32(); + position[3] = v.pos.w.ToFloat32(); + color[0] = v.color.x.ToFloat32(); + color[1] = v.color.y.ToFloat32(); + color[2] = v.color.z.ToFloat32(); + color[3] = v.color.w.ToFloat32(); + tex_coord0[0] = v.tc0.x.ToFloat32(); + tex_coord0[1] = v.tc0.y.ToFloat32(); + tex_coord1[0] = v.tc1.x.ToFloat32(); + tex_coord1[1] = v.tc1.y.ToFloat32(); + tex_coord2[0] = v.tc2.x.ToFloat32(); + tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + normquat = -normquat; + } + } + + glm::vec4 position; + glm::vec4 color; + glm::vec2 tex_coord0; + glm::vec2 tex_coord1; + glm::vec2 tex_coord2; + float tex_coord0_w; + glm::vec4 normquat; + glm::vec3 view; + }; + + /// Structure that the hardware rendered vertices are composed of + struct HardwareVertex : public VertexInfo + { + HardwareVertex() = default; + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexInfo(v, flip_quaternion) {}; + static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); + static constexpr std::array attribute_desc = + { + vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, position)), + vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, color)), + vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord0)), + vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord1)), + vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord2)), + vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexInfo, tex_coord0_w)), + vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, normquat)), + vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, view)), + }; + }; + + + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + + /// Sets the OpenGL shader in accordance with the current PICA register state + void SetShader(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the depth scale to match the PICA register + void SyncDepthScale(); + + /// Syncs the depth offset to match the PICA register + void SyncDepthOffset(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the fog states to match the PICA register + void SyncFogColor(); + + /// Sync the procedural texture noise configuration to match the PICA register + void SyncProcTexNoise(); + + /// Sync the procedural texture bias configuration to match the PICA register + void SyncProcTexBias(); + + /// Syncs the alpha test states to match the PICA register + void SyncAlphaTest(); + + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs the TEV combiner color buffer to match the PICA register + void SyncCombinerColor(); + + /// Syncs the TEV constant color to match the PICA register + void SyncTevConstColor(std::size_t tev_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage); + + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + + /// Syncs the specified spot light direcition to match the PICA register + void SyncLightSpotDirection(int light_index); + + /// Syncs the specified light's distance attenuation bias to match the PICA register + void SyncLightDistanceAttenuationBias(int light_index); + + /// Syncs the specified light's distance attenuation scale to match the PICA register + void SyncLightDistanceAttenuationScale(int light_index); + + /// Syncs the shadow rendering bias to match the PICA register + void SyncShadowBias(); + + /// Syncs the shadow texture bias to match the PICA register + void SyncShadowTextureBias(); + + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); + + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(bool accelerate_draw); + + /// Generic draw function for DrawTriangles and AccelerateDrawBatch + bool Draw(bool accelerate, bool is_indexed); + + /// Internal implementation for AccelerateDrawBatch + bool AccelerateDrawBatchInternal(bool is_indexed); + + struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; + }; + + /// Retrieve the range and the size of the input vertex + VertexArrayInfo AnalyzeVertexArray(bool is_indexed); + + /// Setup vertex shader for AccelerateDrawBatch + bool SetupVertexShader(); + + /// Setup geometry shader for AccelerateDrawBatch + bool SetupGeometryShader(); + + bool is_amd; + + OpenGLState state; + GLuint default_texture; + + RasterizerCacheOpenGL res_cache; + + std::vector vertex_batch; + + bool shader_dirty = true; + + struct { + UniformData data; + std::array lighting_lut_dirty; + bool lighting_lut_dirty_any; + bool fog_lut_dirty; + bool proctex_noise_lut_dirty; + bool proctex_color_map_dirty; + bool proctex_alpha_map_dirty; + bool proctex_lut_dirty; + bool proctex_diff_lut_dirty; + bool dirty; + } uniform_block_data = {}; + + std::unique_ptr shader_program_manager; + + // They shall be big enough for about one frame. + static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024; + static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; + static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; + + OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw + std::array hw_vao_enabled_attributes{}; + + std::array texture_samplers; + OGLStreamBuffer vertex_buffer; + OGLStreamBuffer uniform_buffer; + OGLStreamBuffer index_buffer; + OGLStreamBuffer texture_buffer; + OGLStreamBuffer texture_lf_buffer; + OGLFramebuffer framebuffer; + GLint uniform_buffer_alignment; + std::size_t uniform_size_aligned_vs; + std::size_t uniform_size_aligned_fs; + + SamplerInfo texture_cube_sampler; + + OGLTexture texture_buffer_lut_lf; + OGLTexture texture_buffer_lut_rg; + OGLTexture texture_buffer_lut_rgba; + + std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; + std::array fog_lut_data{}; + std::array proctex_noise_lut_data{}; + std::array proctex_color_map_data{}; + std::array proctex_alpha_map_data{}; + std::array proctex_lut_data{}; + std::array proctex_diff_lut_data{}; + + bool allow_shadow; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp new file mode 100644 index 000000000..81145ae59 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -0,0 +1,1875 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/bit_field.h" +#include "common/color.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/texture.h" +#include "common/vector_math.h" +#include "core/core.h" +#include "core/frontend/emu_window.h" +#include "core/hle/kernel/process.h" +#include "core/memory.h" +#include "core/settings.h" +#include "video_core/pica_state.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" +#include "video_core/renderer_vulkan/vk_state.h" +#include "video_core/utils.h" +#include "video_core/video_core.h" + +namespace Vulkan { + +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT(static_cast(pixel_format) < fb_format_tuples.size()); + return fb_format_tuples[static_cast(pixel_format)]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + std::size_t tuple_idx = static_cast(pixel_format) - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; + } + return tex_tuple; +} + +template +static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +template +static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gpu_buffer) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 vk_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gpu_ptr = gpu_buffer + ((7 - y) * stride + x) * vk_bytes_per_pixel; + if constexpr (morton_to_gl) { + if constexpr (format == PixelFormat::D24S8) { + gpu_ptr[0] = tile_ptr[3]; + std::memcpy(gpu_ptr + 1, tile_ptr, 3); + } else { + std::memcpy(gpu_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if constexpr (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gpu_ptr + 1, 3); + tile_ptr[3] = gpu_ptr[0]; + } else { + std::memcpy(tile_ptr, gpu_ptr, bytes_per_pixel); + } + } + } + } +} + +template +static void MortonCopy(u32 stride, u32 height, u8* gpu_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); + gpu_buffer += gl_bytes_per_pixel - bytes_per_pixel; + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = (begin_pixel_index % (stride * 8)) / 8; + u32 y = (begin_pixel_index / (stride * 8)) * 8; + + gpu_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; + + auto gpubuf_next_tile = [&] { + x = (x + 8) % stride; + gpu_buffer += 8 * gl_bytes_per_pixel; + if (!x) { + y += 8; + gpu_buffer -= stride * 9 * gl_bytes_per_pixel; + } + }; + + u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gpu_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + gpubuf_next_tile(); + } + + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + PAddr current_paddr = aligned_start; + while (tile_buffer < buffer_end) { + // Pokemon Super Mystery Dungeon will try to use textures that go beyond + // the end address of VRAM. Stop reading if reaches invalid address + if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) || + !VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) { + LOG_ERROR(Render_Vulkan, "Out of bound texture"); + break; + } + MortonCopyTile(stride, tile_buffer, gpu_buffer); + tile_buffer += tile_size; + current_paddr += tile_size; + gpubuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gpu_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array morton_to_gpu_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +static constexpr std::array gpu_to_morton_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +// Allocate an uninitialized texture of appropriate size and format for the surface +VKTexture RasterizerCacheVulkan::AllocateSurfaceTexture(vk::Format format, u32 width, u32 height) +{ + // First check if the texture can be recycled + auto recycled_tex = host_texture_recycler.find({format, width, height}); + if (recycled_tex != host_texture_recycler.end()) { + VKTexture texture = std::move(recycled_tex->second); + host_texture_recycler.erase(recycled_tex); + return texture; + } + + // Otherwise create a brand new texture + u32 levels = std::log2(std::max(width, height)) + 1; + VKTexture::Info texture_info = { + .width = width, + .height = height, + .format = format, + .type = vk::ImageType::e2D, + .view_type = vk::ImageViewType::e2D, + .mipmap_levels = levels + }; + + VKTexture texture; + texture.Create(texture_info); + + return texture; +} + +static bool BlitTextures(GLuint src_tex, const Common::Rectangle& src_rect, GLuint dst_tex, + const Common::Rectangle& dst_rect, SurfaceType type, + GLuint read_fb_handle, GLuint draw_fb_handle) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + u32 buffers = 0; + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_tex, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_tex, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + // TODO (wwylele): use GL_NEAREST for shadow map texture + // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but + // doing linear intepolation componentwise would cause incorrect value. However, for a + // well-programmed game this code path should be rarely executed for shadow map with + // inconsistent scale. + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, + dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + + return true; +} + +static bool FillSurface(const Surface& surface, const u8* fill_data, + const Common::Rectangle& fill_rect, GLuint draw_fb_handle) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.scissor.enabled = true; + state.scissor.x = static_cast(fill_rect.left); + state.scissor.y = static_cast(fill_rect.bottom); + state.scissor.width = static_cast(fill_rect.GetWidth()); + state.scissor.height = static_cast(fill_rect.GetHeight()); + + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + surface->InvalidateAllWatcher(); + + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Common::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); + + std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, + color.w / 255.f}; + + state.color_mask.red_enabled = GL_TRUE; + state.color_mask.green_enabled = GL_TRUE; + state.color_mask.blue_enabled = GL_TRUE; + state.color_mask.alpha_enabled = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + u32 value_32bit = 0; + GLfloat value_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 + } + + state.depth.write_mask = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, sizeof(u32)); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; +} + +CachedSurface::~CachedSurface() { + if (texture.handle) { + auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8), + custom_tex_info.width, custom_tex_info.height} + : HostTextureTag{GetFormatTuple(pixel_format), GetScaledWidth(), + GetScaledHeight()}; + + owner.host_texture_recycler.emplace(tag, std::move(texture)); + } +} + +bool CachedSurface::CanFill(const SurfaceParams& dest_surface, + SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +MICROPROFILE_DEFINE(Vulkan_CopySurface, "Vulkan", "CopySurface", MP_RGB(128, 192, 64)); +void RasterizerCacheVulkan::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + MICROPROFILE_SCOPE(Vulkan_CopySurface); + + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + + ASSERT(src_surface != dst_surface); + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), + draw_framebuffer.handle); + return; + } + if (src_surface->CanSubRect(subrect_params)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), + dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), + src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); + return; + } + UNREACHABLE(); +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + const bool need_swap = + GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); + + const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + if (gl_buffer.empty()) { + gl_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format)); + } + + // TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + if (need_swap) { + // TODO(liushuyu): check if the byteswap here is 100% correct + // cannot fully test this + if (pixel_format == PixelFormat::RGBA8) { + for (std::size_t i = start_offset; i < load_end - addr; i += 4) { + gl_buffer[i] = texture_src_data[i + 3]; + gl_buffer[i + 1] = texture_src_data[i + 2]; + gl_buffer[i + 2] = texture_src_data[i + 1]; + gl_buffer[i + 3] = texture_src_data[i]; + } + } else if (pixel_format == PixelFormat::RGB8) { + for (std::size_t i = start_offset; i < load_end - addr; i += 3) { + gl_buffer[i] = texture_src_data[i + 2]; + gl_buffer[i + 1] = texture_src_data[i + 1]; + gl_buffer[i + 2] = texture_src_data[i]; + } + } + } else { + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + load_end - load_start); + } + } else { + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; + + const SurfaceInterval load_interval(load_start, load_end); + const auto rect = GetSubRect(FromInterval(load_interval)); + ASSERT(FromInterval(load_interval).GetInterval() == load_interval); + + for (unsigned y = rect.bottom; y < rect.top; ++y) { + for (unsigned x = rect.left; x < rect.right; ++x) { + auto vec4 = + Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const std::size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); + } + } + } else { + morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + addr, load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format)); + + // TODO: Should probably be done in ::Memory:: and check for other regions too + // same as loadglbuffer() + if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { + std::memcpy(&dst_buffer[offset], &fill_data[0], + std::min(fill_size, end_offset - offset)); + } + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + if (pixel_format == PixelFormat::RGBA8 && GLES) { + for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { + dst_buffer[i] = gl_buffer[i + 3]; + dst_buffer[i + 1] = gl_buffer[i + 2]; + dst_buffer[i + 2] = gl_buffer[i + 1]; + dst_buffer[i + 3] = gl_buffer[i]; + } + } else if (pixel_format == PixelFormat::RGB8 && GLES) { + for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { + dst_buffer[i] = gl_buffer[i + 2]; + dst_buffer[i + 1] = gl_buffer[i + 1]; + dst_buffer[i + 2] = gl_buffer[i]; + } + } else { + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], + flush_end - flush_start); + } + } else { + gl_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + addr, flush_start, flush_end); + } +} + +bool CachedSurface::LoadCustomTexture(u64 tex_hash) { + auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); + const auto& image_interface = Core::System::GetInstance().GetImageInterface(); + + if (custom_tex_cache.IsTextureCached(tex_hash)) { + custom_tex_info = custom_tex_cache.LookupTexture(tex_hash); + return true; + } + + if (!custom_tex_cache.CustomTextureExists(tex_hash)) { + return false; + } + + const auto& path_info = custom_tex_cache.LookupTexturePathInfo(tex_hash); + if (!image_interface->DecodePNG(custom_tex_info.tex, custom_tex_info.width, + custom_tex_info.height, path_info.path)) { + LOG_ERROR(Render_OpenGL, "Failed to load custom texture {}", path_info.path); + return false; + } + + const std::bitset<32> width_bits(custom_tex_info.width); + const std::bitset<32> height_bits(custom_tex_info.height); + if (width_bits.count() != 1 || height_bits.count() != 1) { + LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path); + return false; + } + + LOG_DEBUG(Render_OpenGL, "Loaded custom texture from {}", path_info.path); + Common::FlipRGBA8Texture(custom_tex_info.tex, custom_tex_info.width, custom_tex_info.height); + custom_tex_cache.CacheTexture(tex_hash, custom_tex_info.tex, custom_tex_info.width, + custom_tex_info.height); + return true; +} + +void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) { + // Make sure the texture size is a power of 2 + // If not, the surface is actually a framebuffer + std::bitset<32> width_bits(width); + std::bitset<32> height_bits(height); + if (width_bits.count() != 1 || height_bits.count() != 1) { + LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})", + tex_hash, width, height); + return; + } + + // Dump texture to RGBA8 and encode as PNG + const auto& image_interface = Core::System::GetInstance().GetImageInterface(); + auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); + std::string dump_path = + fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), + Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id); + if (!FileUtil::CreateFullPath(dump_path)) { + LOG_ERROR(Render, "Unable to create {}", dump_path); + return; + } + + dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, tex_hash, pixel_format); + if (!custom_tex_cache.IsTextureDumped(tex_hash) && !FileUtil::Exists(dump_path)) { + custom_tex_cache.SetTextureDumped(tex_hash); + + LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path); + std::vector decoded_texture; + decoded_texture.resize(width * height * 4); + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_texture = state.texture_units[0].texture_2d; + state.Apply(); + /* + GetTexImageOES is used even if not using OpenGL ES to work around a small issue that + happens if using custom textures with texture dumping at the same. + Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a + higher quality 256x256 texture. If the 256x256 texture is displayed first and the + 32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture + will appear in the corner of the 256x256 texture. If texture dumping is enabled and + the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL + texture is still 256x256, Citra crashes because it thinks the texture is only 32x32. + GetTexImageOES conveniently only dumps the specified region, and works on both + desktop and ES. + */ + // if the backend isn't OpenGL ES, this won't be initialized yet + if (!owner.texture_downloader_es) + owner.texture_downloader_es = std::make_unique(false); + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, + height, width, &decoded_texture[0]); + state.texture_units[0].texture_2d = old_texture; + state.Apply(); + Common::FlipRGBA8Texture(decoded_texture, width, height); + if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height)) + LOG_ERROR(Render_OpenGL, "Failed to save decoded texture"); + } +} + +MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); +void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) + return; + + MICROPROFILE_SCOPE(OpenGL_TextureUL); + + ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format)); + + u64 tex_hash = 0; + + if (Settings::values.dump_textures || Settings::values.custom_textures) { + tex_hash = Common::ComputeHash64(gl_buffer.data(), gl_buffer.size()); + } + + if (Settings::values.custom_textures) { + is_custom = LoadCustomTexture(tex_hash); + } + + // Load data from memory to the surface + GLint x0 = static_cast(rect.left); + GLint y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in + // surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + if (is_custom) { + unscaled_tex = owner.AllocateSurfaceTexture( + GetFormatTuple(PixelFormat::RGBA8), custom_tex_info.width, custom_tex_info.height); + } else { + unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight()); + } + target_tex = unscaled_tex.handle; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = target_tex; + cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + if (is_custom) { + if (res_scale == 1) { + texture = owner.AllocateSurfaceTexture(GetFormatTuple(PixelFormat::RGBA8), + custom_tex_info.width, custom_tex_info.height); + cur_state.texture_units[0].texture_2d = texture.handle; + cur_state.Apply(); + } + // always going to be using rgba8 + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(custom_tex_info.width)); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, custom_tex_info.width, custom_tex_info.height, + GL_RGBA, GL_UNSIGNED_BYTE, custom_tex_info.tex.data()); + } else { + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + if (Settings::values.dump_textures && !is_custom) + DumpTexture(target_tex, tex_hash); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + auto from_rect = + is_custom ? Common::Rectangle{0, custom_tex_info.height, custom_tex_info.width, 0} + : Common::Rectangle{0, rect.GetHeight(), rect.GetWidth(), 0}; + if (!owner.texture_filterer->Filter(unscaled_tex.handle, from_rect, texture.handle, + scaled_rect, type, read_fb_handle, draw_fb_handle)) { + BlitTextures(unscaled_tex.handle, from_rect, texture.handle, scaled_rect, type, + read_fb_handle, draw_fb_handle); + } + } + + InvalidateAllWatcher(); +} + +MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) { + return; + } + + MICROPROFILE_SCOPE(OpenGL_TextureDL); + + if (gl_buffer.empty()) { + gl_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format)); + } + + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + std::size_t buffer_offset = + (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + Common::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + OGLTexture unscaled_tex = + owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight()); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, + read_fb_handle, draw_fb_handle); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + if (GLES) { + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect.GetHeight(), rect.GetWidth(), + &gl_buffer[buffer_offset]); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + } else { + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + } + switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) { + case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment"); + break; + case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions"); + break; + case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment"); + break; + case GL_FRAMEBUFFER_UNSUPPORTED: + LOG_WARNING(Render_OpenGL, "Framebuffer unsupported"); + break; + } + glReadPixels(static_cast(rect.left), static_cast(rect.bottom), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require + // validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +static constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template +static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval = std::nullopt) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (const auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + find_flags & MatchFlags::Copy + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (!(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + auto copy_interval = + params.FromInterval(*validate_interval).GetCopyableInterval(surface); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + texture_filterer = std::make_unique(Settings::values.texture_filter_name, + resolution_scale_factor); + format_reinterpreter = std::make_unique(); + if (GLES) + texture_downloader_es = std::make_unique(false); + + read_framebuffer.Create(); + draw_framebuffer.Create(); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { +#ifndef ANDROID + // This is for switching renderers, which is unsupported on Android, and costly on shutdown + ClearAll(false); +#endif +} + +MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const Common::Rectangle& src_rect, + const Surface& dst_surface, + const Common::Rectangle& dst_rect) { + MICROPROFILE_SCOPE(OpenGL_BlitSurface); + + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + dst_surface->InvalidateAllWatcher(); + + return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, + dst_rect, src_surface->type, read_framebuffer.handle, + draw_framebuffer.handle); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find + // it to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, Common::Rectangle{}); + } + + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); + } + + return std::make_tuple(surface, surface->GetScaledSubRect(params)); +} + +Surface RasterizerCacheOpenGL::GetTextureSurface( + const Pica::TexturingRegs::FullTextureConfig& config) { + Pica::Texture::TextureInfo info = + Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + return GetTextureSurface(info, config.config.lod.max_level); +} + +Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInfo& info, + u32 max_level) { + if (info.physical_address == 0) { + return nullptr; + } + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.res_scale = texture_filterer->IsNull() ? 1 : resolution_scale_factor; + params.UpdateParams(); + + u32 min_width = info.width >> max_level; + u32 min_height = info.height >> max_level; + if (min_width % 8 != 0 || min_height % 8 != 0) { + LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, + min_height); + return nullptr; + } + if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { + LOG_CRITICAL(Render_OpenGL, + "Texture size ({}x{}) does not support required mipmap level ({})", + params.width, params.height, max_level); + return nullptr; + } + + auto surface = GetSurface(params, ScaleMatch::Ignore, true); + if (!surface) + return nullptr; + + // Update mipmap if necessary + if (max_level != 0) { + if (max_level >= 8) { + // since PICA only supports texture size between 8 and 1024, there are at most eight + // possible mipmap levels including the base. + LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); + return nullptr; + } + OpenGLState prev_state = OpenGLState::GetCurState(); + OpenGLState state; + SCOPE_EXIT({ prev_state.Apply(); }); + auto format_tuple = GetFormatTuple(params.pixel_format); + + // Allocate more mipmap level if necessary + if (surface->max_level < max_level) { + state.texture_units[0].texture_2d = surface->texture.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); + u32 width; + u32 height; + if (surface->is_custom) { + width = surface->custom_tex_info.width; + height = surface->custom_tex_info.height; + } else { + width = surface->GetScaledWidth(); + height = surface->GetScaledHeight(); + } + // If we are using ARB_texture_storage then we've already allocated all of the mipmap + // levels + if (!GL_ARB_texture_storage) { + for (u32 level = surface->max_level + 1; level <= max_level; ++level) { + glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level, + height >> level, 0, format_tuple.format, format_tuple.type, + nullptr); + } + } + if (surface->is_custom || !texture_filterer->IsNull()) { + // TODO: proper mipmap support for custom textures + glGenerateMipmap(GL_TEXTURE_2D); + } + surface->max_level = max_level; + } + + // Blit mipmaps that have been invalidated + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.ResetTexture(surface->texture.handle); + SurfaceParams surface_params = *surface; + for (u32 level = 1; level <= max_level; ++level) { + // In PICA all mipmap levels are stored next to each other + surface_params.addr += + surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; + surface_params.width /= 2; + surface_params.height /= 2; + surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it + surface_params.UpdateParams(); + auto& watcher = surface->level_watchers[level - 1]; + if (!watcher || !watcher->Get()) { + auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); + if (level_surface) { + watcher = level_surface->CreateWatcher(); + } else { + watcher = nullptr; + } + } + + if (watcher && !watcher->IsValid()) { + auto level_surface = watcher->Get(); + if (!level_surface->invalid_regions.empty()) { + ValidateSurface(level_surface, level_surface->addr, level_surface->size); + } + state.ResetTexture(level_surface->texture.handle); + state.Apply(); + if (!surface->is_custom && texture_filterer->IsNull()) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + level_surface->texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + auto src_rect = level_surface->GetScaledRect(); + auto dst_rect = surface_params.GetScaledRect(); + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, + dst_rect.left, dst_rect.bottom, dst_rect.right, dst_rect.top, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + } + watcher->Validate(); + } + } + } + + return surface; +} + +const CachedTextureCube& RasterizerCacheOpenGL::GetTextureCube(const TextureCubeConfig& config) { + auto& cube = texture_cube_cache[config]; + + struct Face { + Face(std::shared_ptr& watcher, PAddr address, GLenum gl_face) + : watcher(watcher), address(address), gl_face(gl_face) {} + std::shared_ptr& watcher; + PAddr address; + GLenum gl_face; + }; + + const std::array faces{{ + {cube.px, config.px, GL_TEXTURE_CUBE_MAP_POSITIVE_X}, + {cube.nx, config.nx, GL_TEXTURE_CUBE_MAP_NEGATIVE_X}, + {cube.py, config.py, GL_TEXTURE_CUBE_MAP_POSITIVE_Y}, + {cube.ny, config.ny, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y}, + {cube.pz, config.pz, GL_TEXTURE_CUBE_MAP_POSITIVE_Z}, + {cube.nz, config.nz, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z}, + }}; + + for (const Face& face : faces) { + if (!face.watcher || !face.watcher->Get()) { + Pica::Texture::TextureInfo info; + info.physical_address = face.address; + info.height = info.width = config.width; + info.format = config.format; + info.SetDefaultStride(); + auto surface = GetTextureSurface(info); + if (surface) { + face.watcher = surface->CreateWatcher(); + } else { + // Can occur when texture address is invalid. We mark the watcher with nullptr + // in this case and the content of the face wouldn't get updated. These are + // usually leftover setup in the texture unit and games are not supposed to draw + // using them. + face.watcher = nullptr; + } + } + } + + if (cube.texture.handle == 0) { + for (const Face& face : faces) { + if (face.watcher) { + auto surface = face.watcher->Get(); + cube.res_scale = std::max(cube.res_scale, surface->res_scale); + } + } + + cube.texture.Create(); + AllocateTextureCube( + cube.texture.handle, + GetFormatTuple(CachedSurface::PixelFormatFromTextureFormat(config.format)), + cube.res_scale * config.width); + } + + u32 scaled_size = cube.res_scale * config.width; + + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.ResetTexture(cube.texture.handle); + + for (const Face& face : faces) { + if (face.watcher && !face.watcher->IsValid()) { + auto surface = face.watcher->Get(); + if (!surface->invalid_regions.empty()) { + ValidateSurface(surface, surface->addr, surface->size); + } + state.ResetTexture(surface->texture.handle); + state.Apply(); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, face.gl_face, + cube.texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + + auto src_rect = surface->GetScaledRect(); + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, 0, 0, + scaled_size, scaled_size, GL_COLOR_BUFFER_BIT, GL_LINEAR); + face.watcher->Validate(); + } + } + + return cube; +} + +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, const Common::Rectangle& viewport_rect) { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // update resolution_scale_factor and reset cache if changed + if ((resolution_scale_factor != VideoCore::GetResolutionScaleFactor()) | + (VideoCore::g_texture_filter_update_requested.exchange(false) && + texture_filterer->Reset(Settings::values.texture_filter_name, resolution_scale_factor))) { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + texture_cube_cache.clear(); + } + + Common::Rectangle viewport_clamped{ + static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + Common::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + Common::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + Common::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + color_surface->InvalidateAllWatcher(); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + depth_surface->InvalidateAllWatcher(); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); +} + +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(*this); + + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + new_surface->res_scale = std::numeric_limits::max(); + + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + Common::Rectangle rect{}; + + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return std::make_tuple(match_surface, rect); +} + +void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (const auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + auto validate_regions = surface->invalid_regions & validate_interval; + auto notify_validated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; + + while (true) { + const auto it = validate_regions.begin(); + if (it == validate_regions.end()) + break; + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + notify_validated(copy_interval); + continue; + } + + // Try to find surface in cache with different format + // that can can be reinterpreted to the requested format. + if (ValidateByReinterpretation(surface, params, interval)) { + notify_validated(interval); + continue; + } + // Could not find a matching reinterpreter, check if we need to implement a + // reinterpreter + if (NoUnimplementedReinterpretations(surface, params, interval) && + !IntervalHasInvalidPixelFormat(params, interval)) { + // No surfaces were found in the cache that had a matching bit-width. + // If the region was created entirely on the GPU, + // assume it was a developer mistake and skip flushing. + if (boost::icl::contains(dirty_regions, interval)) { + LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is " + "invalid. Skipping validation"); + validate_regions.erase(interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadGLBuffer(params.addr, params.end); + surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + notify_validated(params.GetInterval()); + } +} + +bool RasterizerCacheOpenGL::NoUnimplementedReinterpretations(const Surface& surface, + SurfaceParams& params, + const SurfaceInterval& interval) { + static constexpr std::array all_formats{ + PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, + PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, + PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, + PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, + PixelFormat::D24S8, + }; + bool implemented = true; + for (PixelFormat format : all_formats) { + if (SurfaceParams::GetFormatBpp(format) == surface->GetFormatBpp()) { + params.pixel_format = format; + // This could potentially be expensive, + // although experimentally it hasn't been too bad + Surface test_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (test_surface != nullptr) { + LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", + SurfaceParams::PixelFormatAsString(format), + SurfaceParams::PixelFormatAsString(surface->pixel_format)); + implemented = false; + } + } + } + return implemented; +} + +bool RasterizerCacheOpenGL::IntervalHasInvalidPixelFormat(SurfaceParams& params, + const SurfaceInterval& interval) { + params.pixel_format = PixelFormat::Invalid; + for (const auto& set : RangeFromInterval(surface_cache, interval)) + for (const auto& surface : set.second) + if (surface->pixel_format == PixelFormat::Invalid) { + LOG_WARNING(Render_OpenGL, "Surface found with invalid pixel format"); + return true; + } + return false; +} + +bool RasterizerCacheOpenGL::ValidateByReinterpretation(const Surface& surface, + SurfaceParams& params, + const SurfaceInterval& interval) { + auto [cvt_begin, cvt_end] = + format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format); + for (auto reinterpreter = cvt_begin; reinterpreter != cvt_end; ++reinterpreter) { + PixelFormat format = reinterpreter->first.src_format; + params.pixel_format = format; + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + + if (reinterpret_surface != nullptr) { + SurfaceInterval reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); + SurfaceParams reinterpret_params = surface->FromInterval(reinterpret_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); + auto dest_rect = surface->GetScaledSubRect(reinterpret_params); + + if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && + surface->res_scale == resolution_scale_factor) { + // The destination surface is either a framebuffer, or a filtered texture. + // Create an intermediate surface to convert to before blitting to the + // destination. + Common::Rectangle tmp_rect{0, dest_rect.GetHeight() / resolution_scale_factor, + dest_rect.GetWidth() / resolution_scale_factor, 0}; + OGLTexture tmp_tex = AllocateSurfaceTexture( + GetFormatTuple(reinterpreter->first.dst_format), tmp_rect.right, tmp_rect.top); + reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect, + read_framebuffer.handle, tmp_tex.handle, + tmp_rect, draw_framebuffer.handle); + SurfaceParams::SurfaceType type = + SurfaceParams::GetFormatType(reinterpreter->first.dst_format); + + if (!texture_filterer->Filter(tmp_tex.handle, tmp_rect, surface->texture.handle, + dest_rect, type, read_framebuffer.handle, + draw_framebuffer.handle)) { + BlitTextures(tmp_tex.handle, tmp_rect, surface->texture.handle, dest_rect, type, + read_framebuffer.handle, draw_framebuffer.handle); + } + } else { + reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect, + read_framebuffer.handle, surface->texture.handle, + dest_rect, draw_framebuffer.handle); + } + return true; + } + } + return false; +} + +void RasterizerCacheOpenGL::ClearAll(bool flush) { + const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF); + // Force flush all surfaces from the cache + if (flush) { + FlushRegion(0x0, 0xFFFFFFFF); + } + // Unmark all of the marked pages + for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) { + const auto interval = pair.first & flush_interval; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } + + // Remove the whole cache without really looking at it. + cached_pages -= flush_interval; + dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); + surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF); + remove_surfaces.clear(); +} + +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + } + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + flushed_intervals += interval; + } + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +void RasterizerCacheOpenGL::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (const auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + cached_surface->InvalidateAllWatcher(); + + // If the surface has no salvageable data it should be removed from the cache to avoid + // clogging the data structure + if (cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } + } + } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (const auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(*this); + static_cast(*surface) = params; + + surface->invalid_regions.insert(surface->GetInterval()); + + surface->texture = + AllocateSurfaceTexture(GetFormatTuple(surface->pixel_format), surface->GetScaledWidth(), + surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (!surface->registered) { + return; + } + surface->registered = false; + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, + true); + else if (delta < 0 && count == -delta) + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, + false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h new file mode 100644 index 000000000..de44d1f29 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h @@ -0,0 +1,371 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-local-typedefs" +#endif +#include +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#include +#include +#include +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/math_util.h" +#include "core/custom_tex_cache.h" +#include "video_core/renderer_vulkan/vk_surface_params.h" +#include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/texture/texture_decode.h" + +namespace Vulkan { + +class RasterizerCacheVulkan; +class TextureFilterer; +class FormatReinterpreterVulkan; + +const vk::Format& GetFormatTuple(SurfaceParams::PixelFormat pixel_format); + +struct HostTextureTag { + vk::Format format; + u32 width; + u32 height; + bool operator==(const HostTextureTag& rhs) const noexcept { + return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height); + }; +}; + +struct TextureCubeConfig { + PAddr px; + PAddr nx; + PAddr py; + PAddr ny; + PAddr pz; + PAddr nz; + u32 width; + Pica::TexturingRegs::TextureFormat format; + + bool operator==(const TextureCubeConfig& rhs) const { + return std::tie(px, nx, py, ny, pz, nz, width, format) == + std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format); + } + + bool operator!=(const TextureCubeConfig& rhs) const { + return !(*this == rhs); + } +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept { + std::size_t hash = 0; + boost::hash_combine(hash, tag.format); + boost::hash_combine(hash, tag.width); + boost::hash_combine(hash, tag.height); + return hash; + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::TextureCubeConfig& config) const noexcept { + std::size_t hash = 0; + boost::hash_combine(hash, config.px); + boost::hash_combine(hash, config.nx); + boost::hash_combine(hash, config.py); + boost::hash_combine(hash, config.ny); + boost::hash_combine(hash, config.pz); + boost::hash_combine(hash, config.nz); + boost::hash_combine(hash, config.width); + boost::hash_combine(hash, static_cast(config.format)); + return hash; + } +}; +} // namespace std + +namespace Vulkan { + +using SurfaceSet = std::set; + +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = + boost::icl::interval_map; +using SurfaceCache = + boost::icl::interval_map; + +static_assert(std::is_same() && + std::is_same(), + "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +/** + * A watcher that notifies whether a cached surface has been changed. This is useful for caching + * surface collection objects, including texture cube and mipmap. + */ +struct SurfaceWatcher { +public: + explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} + + /** + * Checks whether the surface has been changed. + * @return false if the surface content has been changed since last Validate() call or has been + * destroyed; otherwise true + */ + bool IsValid() const { + return !surface.expired() && valid; + } + + /// Marks that the content of the referencing surface has been updated to the watcher user. + void Validate() { + ASSERT(!surface.expired()); + valid = true; + } + + /// Gets the referencing surface. Returns null if the surface has been destroyed + Surface Get() const { + return surface.lock(); + } + +private: + friend struct CachedSurface; + std::weak_ptr surface; + bool valid = false; +}; + +class RasterizerCacheVulkan; + +struct CachedSurface : SurfaceParams, std::enable_shared_from_this { + CachedSurface(RasterizerCacheVulkan& owner) : owner{owner} {} + ~CachedSurface(); + + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsSurfaceFullyInvalid() const { + auto interval = GetInterval(); + return *invalid_regions.equal_range(interval).first == interval; + } + + bool registered = false; + SurfaceRegions invalid_regions; + + u32 fill_size = 0; /// Number of bytes to read from fill_data + std::array fill_data; + + VKTexture texture; + + /// max mipmap level that has been attached to the texture + u32 max_level = 0; + /// level_watchers[i] watches the (i+1)-th level mipmap source surface + std::array, 7> level_watchers; + + bool is_custom = false; + Core::CustomTexInfo custom_tex_info; + + static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { + return format == PixelFormat::Invalid + ? 0 + : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) + ? 4 + : SurfaceParams::GetFormatBpp(format) / 8; + } + + std::vector vk_buffer; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Custom texture loading and dumping + bool LoadCustomTexture(u64 tex_hash); + void DumpTexture(VKTexture& target_tex, u64 tex_hash); + + // Upload/Download data in vk_buffer in/to this surface's texture + void UploadGLTexture(Common::Rectangle rect, GLuint read_fb_handle, GLuint draw_fb_handle); + void DownloadGLTexture(const Common::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); + + std::shared_ptr CreateWatcher() { + auto watcher = std::make_shared(weak_from_this()); + watchers.push_front(watcher); + return watcher; + } + + void InvalidateAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + } + } + } + + void UnlinkAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + locked->surface.reset(); + } + } + watchers.clear(); + } + +private: + RasterizerCacheVulkan& owner; + std::list> watchers; +}; + +struct CachedTextureCube { + VKTexture texture; + u16 res_scale = 1; + std::shared_ptr px; + std::shared_ptr nx; + std::shared_ptr py; + std::shared_ptr ny; + std::shared_ptr pz; + std::shared_ptr nz; +}; + +class TextureDownloader; + +class RasterizerCacheVulkan : NonCopyable { +public: + RasterizerCacheVulkan(); + ~RasterizerCacheVulkan(); + + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, + const Surface& dst_surface, const Common::Rectangle& dst_rect); + + /// Copy one surface's region to another + void CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval); + + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from + /// 3DS memory to OpenGL and caches it (if not already cached) + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); + + /// Get a texture cube based on the texture configuration + const CachedTextureCube& GetTextureCube(const TextureCubeConfig& config); + + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const Common::Rectangle& viewport_rect); + + /// Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); + + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); + + /// Flush all cached resources tracked by this cache manager + void FlushAll(); + + /// Clear all cached resources tracked by this cache manager + void ClearAll(bool flush); + + // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead + // in the driver + // this must be placed above the surface_cache to ensure all cached surfaces are destroyed + // before destroying the recycler + std::unordered_multimap host_texture_recycler; + +private: + void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); + + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + // Returns false if there is a surface in the cache at the interval with the same bit-width, + bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface, + Vulkan::SurfaceParams& params, + const Vulkan::SurfaceInterval& interval); + + // Return true if a surface with an invalid pixel format exists at the interval + bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); + + // Attempt to find a reinterpretable surface in the cache and use it to copy for validation + bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, + const SurfaceInterval& interval); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + + SurfaceCache surface_cache; + PageMap cached_pages; + SurfaceMap dirty_regions; + SurfaceSet remove_surfaces; + + OGLFramebuffer read_framebuffer; + OGLFramebuffer draw_framebuffer; + + u16 resolution_scale_factor; + + std::unordered_map texture_cube_cache; + + std::recursive_mutex mutex; + +public: + VKTexture AllocateSurfaceTexture(vk::Format format, u32 width, u32 height); + + std::unique_ptr texture_filterer; + std::unique_ptr format_reinterpreter; + std::unique_ptr texture_downloader_es; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_resource_cache.cpp b/src/video_core/renderer_vulkan/vk_resource_cache.cpp new file mode 100644 index 000000000..07b8035f1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_cache.cpp @@ -0,0 +1,164 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_resource_cache.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include +#include +#include + +namespace Vulkan { + +VKResourceCache::~VKResourceCache() +{ + for (int i = 0; i < DESCRIPTOR_SET_LAYOUT_COUNT; i++) { + g_vk_instace->GetDevice().destroyDescriptorSetLayout(descriptor_layouts[i]); + } +} + +bool VKResourceCache::Initialize() +{ + // Define the descriptor sets we will be using + std::array ubo_set = {{ + { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | vk::ShaderStageFlagBits::eFragment }, // shader_data + { 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex } // pica_uniforms + }}; + + std::array texture_set = {{ + { 0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex0 + { 1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex1 + { 2, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex2 + { 3, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex_cube + }}; + + std::array lut_set = {{ + { 0, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // texture_buffer_lut_lf + { 1, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // texture_buffer_lut_rg + { 2, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment } // texture_buffer_lut_rgba + }}; + + // Create and store descriptor set layouts + std::array create_infos = {{ + { vk::DescriptorSetLayoutCreateFlags(), ubo_set }, + { vk::DescriptorSetLayoutCreateFlags(), texture_set }, + { vk::DescriptorSetLayoutCreateFlags(), lut_set } + }}; + + for (int i = 0; i < DESCRIPTOR_SET_LAYOUT_COUNT; i++) { + descriptor_layouts[i] = g_vk_instace->GetDevice().createDescriptorSetLayout(create_infos[i]); + } + + // Create the standard descriptor set layout + vk::PipelineLayoutCreateInfo layout_info({}, descriptor_layouts); + pipeline_layout = g_vk_instace->GetDevice().createPipelineLayoutUnique(layout_info); + + if (!CreateStaticSamplers()) + return false; + + // Create global texture staging buffer + texture_upload_buffer.Create(MAX_TEXTURE_UPLOAD_BUFFER_SIZE, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + vk::BufferUsageFlagBits::eTransferSrc); + + return true; +} + +vk::Sampler VKResourceCache::GetSampler(const SamplerInfo& info) +{ + auto iter = sampler_cache.find(info); + if (iter != sampler_cache.end()) { + return iter->second; + } + + // Create texture sampler + auto properties = g_vk_instace->GetPhysicalDevice().getProperties(); + auto features = g_vk_instace->GetPhysicalDevice().getFeatures(); + vk::SamplerCreateInfo sampler_info + ( + {}, + info.mag_filter, + info.min_filter, + info.mipmap_mode, + info.wrapping[0], info.wrapping[1], info.wrapping[2], + {}, + features.samplerAnisotropy, + properties.limits.maxSamplerAnisotropy, + false, + vk::CompareOp::eAlways, + {}, + {}, + vk::BorderColor::eFloatTransparentBlack, + false + ); + + auto sampler = g_vk_instace->GetDevice().createSamplerUnique(sampler_info); + vk::Sampler handle = sampler.get(); + + // Store it even if it failed + sampler_cache.emplace(info, std::move(sampler)); + return handle; +} + +vk::RenderPass VKResourceCache::GetRenderPass(vk::Format color_format, vk::Format depth_format, + u32 multisamples, vk::AttachmentLoadOp load_op) +{ + auto key = std::tie(color_format, depth_format, multisamples, load_op); + auto it = render_pass_cache.find(key); + if (it != render_pass_cache.end()) { + return it->second; + } + + vk::SubpassDescription subpass({}, vk::PipelineBindPoint::eGraphics); + std::array attachments; + std::array references; + u32 index = 0; + + if (color_format != vk::Format::eUndefined) { + references[index] = vk::AttachmentReference{index, vk::ImageLayout::eColorAttachmentOptimal}; + attachments[index] = + { + {}, + color_format, + static_cast(multisamples), + load_op, + vk::AttachmentStoreOp::eStore, + vk::AttachmentLoadOp::eDontCare, + vk::AttachmentStoreOp::eDontCare, + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eColorAttachmentOptimal + }; + + subpass.setColorAttachmentCount(1); + subpass.setPColorAttachments(&references[index++]); + } + + if (depth_format != vk::Format::eUndefined) { + references[index] = vk::AttachmentReference{index, vk::ImageLayout::eDepthStencilAttachmentOptimal}; + attachments[index] = + { + {}, + depth_format, + static_cast(multisamples), + load_op, + vk::AttachmentStoreOp::eStore, + vk::AttachmentLoadOp::eDontCare, + vk::AttachmentStoreOp::eDontCare, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + subpass.setPDepthStencilAttachment(&references[index++]); + } + + std::array subpasses = { subpass }; + vk::RenderPassCreateInfo renderpass_info({}, attachments, subpasses); + + auto renderpass = g_vk_instace->GetDevice().createRenderPassUnique(renderpass_info); + vk::RenderPass handle = renderpass.get(); + + render_pass_cache.emplace(key, std::move(renderpass)); + return handle; +} +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_cache.h b/src/video_core/renderer_vulkan/vk_resource_cache.h new file mode 100644 index 000000000..5cf8e08fc --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_cache.h @@ -0,0 +1,58 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace Vulkan { + +using RenderPassCacheKey = std::tuple; + +constexpr u32 MAX_TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr u32 DESCRIPTOR_SET_LAYOUT_COUNT = 3; + +class VKResourceCache +{ +public: + VKResourceCache() = default; + ~VKResourceCache(); + + // Perform at startup, create descriptor layouts, compiles all static shaders. + bool Initialize(); + void Shutdown(); + + // Public interface. + VKBuffer& GetTextureUploadBuffer() { return texture_upload_buffer; } + vk::Sampler GetSampler(const SamplerInfo& info); + vk::RenderPass GetRenderPass(vk::Format color_format, vk::Format depth_format, u32 multisamples, vk::AttachmentLoadOp load_op); + vk::PipelineCache GetPipelineCache() const { return pipeline_cache.get(); } + +private: + // Dummy image for samplers that are unbound + VKTexture dummy_texture; + VKBuffer texture_upload_buffer; + + // Descriptor sets + std::array descriptor_layouts; + vk::UniquePipelineLayout pipeline_layout; + + // Render pass cache + std::unordered_map render_pass_cache; + std::unordered_map sampler_cache; + + vk::UniquePipelineCache pipeline_cache; + std::string pipeline_cache_filename; +}; + +extern std::unique_ptr g_object_cache; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp deleted file mode 100644 index f185b2355..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_vars.h" - -MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); - -namespace OpenGL { - -void OGLRenderbuffer::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenRenderbuffers(1, &handle); -} - -void OGLRenderbuffer::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteRenderbuffers(1, &handle); - OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply(); - handle = 0; -} - -void OGLTexture::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenTextures(1, &handle); -} - -void OGLTexture::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteTextures(1, &handle); - OpenGLState::GetCurState().ResetTexture(handle).Apply(); - handle = 0; -} - -void OGLTexture::Allocate(GLenum target, GLsizei levels, GLenum internalformat, GLenum format, - GLenum type, GLsizei width, GLsizei height, GLsizei depth) { - const bool tex_storage = GLAD_GL_ARB_texture_storage || GLES; - - switch (target) { - case GL_TEXTURE_1D: - case GL_TEXTURE: - if (tex_storage) { - glTexStorage1D(target, levels, internalformat, width); - } else { - for (GLsizei level{0}; level < levels; ++level) { - glTexImage1D(target, level, internalformat, width, 0, format, type, nullptr); - width >>= 1; - } - } - break; - case GL_TEXTURE_2D: - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_CUBE_MAP: - if (tex_storage) { - glTexStorage2D(target, levels, internalformat, width, height); - } else { - for (GLsizei level{0}; level < levels; ++level) { - glTexImage2D(target, level, internalformat, width, height, 0, format, type, - nullptr); - width >>= 1; - if (target != GL_TEXTURE_1D_ARRAY) - height >>= 1; - } - } - break; - case GL_TEXTURE_3D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - if (tex_storage) { - glTexStorage3D(target, levels, internalformat, width, height, depth); - } else { - for (GLsizei level{0}; level < levels; ++level) { - glTexImage3D(target, level, internalformat, width, height, depth, 0, format, type, - nullptr); - } - width >>= 1; - height >>= 1; - if (target == GL_TEXTURE_3D) - depth >>= 1; - } - break; - } - - if (!tex_storage) { - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels - 1); - } -} - -void OGLSampler::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenSamplers(1, &handle); -} - -void OGLSampler::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteSamplers(1, &handle); - OpenGLState::GetCurState().ResetSampler(handle).Apply(); - handle = 0; -} - -void OGLShader::Create(const char* source, GLenum type) { - if (handle != 0) - return; - if (source == nullptr) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = LoadShader(source, type); -} - -void OGLShader::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteShader(handle); - handle = 0; -} - -void OGLProgram::Create(bool separable_program, const std::vector& shaders) { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = LoadProgram(separable_program, shaders); -} - -void OGLProgram::Create(const char* vert_shader, const char* frag_shader) { - OGLShader vert, frag; - vert.Create(vert_shader, GL_VERTEX_SHADER); - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(false, {vert.handle, frag.handle}); -} - -void OGLProgram::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteProgram(handle); - OpenGLState::GetCurState().ResetProgram(handle).Apply(); - handle = 0; -} - -void OGLPipeline::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenProgramPipelines(1, &handle); -} - -void OGLPipeline::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteProgramPipelines(1, &handle); - OpenGLState::GetCurState().ResetPipeline(handle).Apply(); - handle = 0; -} - -void OGLBuffer::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenBuffers(1, &handle); -} - -void OGLBuffer::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteBuffers(1, &handle); - OpenGLState::GetCurState().ResetBuffer(handle).Apply(); - handle = 0; -} - -void OGLVertexArray::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenVertexArrays(1, &handle); -} - -void OGLVertexArray::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteVertexArrays(1, &handle); - OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); - handle = 0; -} - -void OGLFramebuffer::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenFramebuffers(1, &handle); -} - -void OGLFramebuffer::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteFramebuffers(1, &handle); - OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); - handle = 0; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h deleted file mode 100644 index 4cbadf46c..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include "common/common_types.h" - -namespace Vulkan { - -class VKContext; - -struct VertexInfo -{ - VertexInfo() = default; - VertexInfo(glm::vec3 position, glm::vec3 color, glm::vec2 coords) : - position(position), color(color), texcoords(coords) {}; - - glm::vec3 position; - glm::vec3 color; - glm::vec2 texcoords; -}; - -struct Vertex : public VertexInfo -{ - Vertex() = default; - Vertex(glm::vec3 position, glm::vec3 color = {}, glm::vec2 coords = {}) : VertexInfo(position, color, coords) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); - static constexpr std::array attribute_desc = - { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, color)), - vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, texcoords)), - }; -}; - -class VKBuffer : public NonCopyable, public Resource -{ - friend class VertexBuffer; -public: - VKBuffer(std::shared_ptr context); - ~VKBuffer(); - - void Create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage); - void Bind(vk::CommandBuffer& command_buffer); - - static uint32_t FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr context); - static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region); - -public: - void* memory = nullptr; - vk::UniqueBuffer buffer; - vk::UniqueDeviceMemory buffer_memory; - vk::UniqueBufferView buffer_view; - uint32_t size = 0; - -protected: - std::shared_ptr context; -}; - -class VKTexture : public NonCopyable, public Resource -{ - friend class VkContext; -public: - VKTexture(const std::shared_ptr& context); - ~VKTexture() = default; - - void Create(int width, int height, vk::ImageType type, vk::Format format = vk::Format::eR8G8B8A8Uint); - void CopyPixels(uint8_t* pixels, uint32_t count); - -private: - void TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout); - -private: - // Texture buffer - void* pixels = nullptr; - std::shared_ptr context; - uint32_t width = 0, height = 0, channels = 0; - VKBuffer staging; - - // Texture objects - vk::UniqueImage texture; - vk::UniqueImageView texture_view; - vk::UniqueDeviceMemory texture_memory; - vk::UniqueSampler texture_sampler; - vk::Format format; -}; - -class OGLShader : private NonCopyable { -public: - OGLShader() = default; - - OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLShader() { - Release(); - } - - OGLShader& operator=(OGLShader&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - void Create(const char* source, GLenum type); - - void Release(); - - GLuint handle = 0; -}; - -class OGLProgram : private NonCopyable { -public: - OGLProgram() = default; - - OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLProgram() { - Release(); - } - - OGLProgram& operator=(OGLProgram&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new program from given shader objects - void Create(bool separable_program, const std::vector& shaders); - - /// Creates a new program from given shader soruce code - void Create(const char* vert_shader, const char* frag_shader); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLPipeline : private NonCopyable { -public: - OGLPipeline() = default; - OGLPipeline(OGLPipeline&& o) noexcept { - handle = std::exchange(o.handle, 0); - } - ~OGLPipeline() { - Release(); - } - OGLPipeline& operator=(OGLPipeline&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLBuffer : private NonCopyable { -public: - OGLBuffer() = default; - - OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLBuffer() { - Release(); - } - - OGLBuffer& operator=(OGLBuffer&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLVertexArray : private NonCopyable { -public: - OGLVertexArray() = default; - - OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLVertexArray() { - Release(); - } - - OGLVertexArray& operator=(OGLVertexArray&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLFramebuffer : private NonCopyable { -public: - OGLFramebuffer() = default; - - OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLFramebuffer() { - Release(); - } - - OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_shader_state.h b/src/video_core/renderer_vulkan/vk_shader_state.h new file mode 100644 index 000000000..f79446adf --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_state.h @@ -0,0 +1,235 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "common/hash.h" +#include "video_core/regs.h" +#include "video_core/shader/shader.h" + +namespace Vulkan { + +enum class ProgramType : u32 { VS, GS, FS }; + +enum Attributes { + ATTRIBUTE_POSITION, + ATTRIBUTE_COLOR, + ATTRIBUTE_TEXCOORD0, + ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, +}; + +// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() +struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { + Pica::TexturingRegs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } +}; + +struct PicaFSConfigState { + Pica::FramebufferRegs::CompareFunc alpha_test_func; + Pica::RasterizerRegs::ScissorMode scissor_test_mode; + Pica::TexturingRegs::TextureConfig::TextureType texture0_type; + bool texture2_use_coord1; + std::array tev_stages; + u8 combiner_buffer_input; + + Pica::RasterizerRegs::DepthBuffering depthmap_enable; + Pica::TexturingRegs::FogMode fog_mode; + bool fog_flip; + bool alphablend_enable; + Pica::FramebufferRegs::LogicOp logic_op; + + struct { + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; + bool shadow_enable; + } light[8]; + + bool enable; + unsigned src_num; + Pica::LightingRegs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::LightingRegs::LightingConfig config; + bool enable_primary_alpha; + bool enable_secondary_alpha; + + bool enable_shadow; + bool shadow_primary; + bool shadow_secondary; + bool shadow_invert; + bool shadow_alpha; + unsigned shadow_selector; + + struct { + bool enable; + bool abs_input; + Pica::LightingRegs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + struct { + bool enable; + u32 coord; + Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; + Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; + bool separate_alpha; + bool noise_enable; + Pica::TexturingRegs::ProcTexShift u_shift, v_shift; + u32 lut_width; + u32 lut_offset0; + u32 lut_offset1; + u32 lut_offset2; + u32 lut_offset3; + u32 lod_min; + u32 lod_max; + Pica::TexturingRegs::ProcTexFilter lut_filter; + } proctex; + + bool shadow_rendering; + bool shadow_texture_orthographic; +}; + +/** + * This struct contains all state used to generate the GLSL fragment shader that emulates the + * current Pica register configuration. This struct is used as a cache key for generated GLSL shader + * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by + * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where + * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) + * two separate shaders sharing the same key. + */ +struct PicaFSConfig : Common::HashableStruct { + + /// Construct a PicaFSConfig with the given Pica register configuration. + static PicaFSConfig BuildFromRegs(const Pica::Regs& regs); + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); + } +}; + +/** + * This struct contains common information to identify a GL vertex/geometry shader generated from + * PICA vertex/geometry shader. + */ +struct PicaShaderConfigCommon { + void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); + + u64 program_hash; + u64 swizzle_hash; + u32 main_offset; + bool sanitize_mul; + + u32 num_outputs; + + // output_map[output register index] -> output attribute index + std::array output_map; +}; + +/** + * This struct contains information to identify a GL vertex shader generated from PICA vertex + * shader. + */ +struct PicaVSConfig : Common::HashableStruct { + explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + state.Init(regs, setup); + } + explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { + state = conf; + } +}; + +struct PicaGSConfigCommonRaw { + void Init(const Pica::Regs& regs); + + u32 vs_output_attributes; + u32 gs_output_attributes; + + struct SemanticMap { + u32 attribute_index; + u32 component_index; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps; +}; + +/** + * This struct contains information to identify a GL geometry shader generated from PICA no-geometry + * shader pipeline + */ +struct PicaFixedGSConfig : Common::HashableStruct { + explicit PicaFixedGSConfig(const Pica::Regs& regs) { + state.Init(regs); + } +}; + +/** + * This struct combines the vertex and fragment states for a complete pipeline cache key + */ +struct VKPipelineCacheKey { + VKPipelineCacheKey(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) : + vertex_config(regs.vs, setup), fragment_config(PicaFSConfig::BuildFromRegs(regs)) {} + + PicaVSConfig vertex_config; + PicaFSConfig fragment_config; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_vulkan/vk_state.h b/src/video_core/renderer_vulkan/vk_state.h index 5c8d85b66..0a9caab34 100644 --- a/src/video_core/renderer_vulkan/vk_state.h +++ b/src/video_core/renderer_vulkan/vk_state.h @@ -41,6 +41,15 @@ constexpr uint ShadowTextureNZ = 6; class VulkanState { public: + struct Messenger { + bool cull_state; + bool depth_state; + bool color_mask; + bool stencil_state; + bool logic_op; + bool texture_state; + }; + struct { bool enabled; vk::CullModeFlags mode; @@ -130,18 +139,8 @@ public: return cur_state; } - /// Apply this state as the current OpenGL state - void Apply() const; - - /// Resets any references to the given resource - VulkanState& ResetTexture(GLuint handle); - VulkanState& ResetSampler(GLuint handle); - VulkanState& ResetProgram(GLuint handle); - VulkanState& ResetPipeline(GLuint handle); - VulkanState& ResetBuffer(GLuint handle); - VulkanState& ResetVertexArray(GLuint handle); - VulkanState& ResetFramebuffer(GLuint handle); - VulkanState& ResetRenderbuffer(GLuint handle); + /// Apply all dynamic state to the provided Vulkan command buffer + void Apply(vk::CommandBuffer& command_buffer) const; private: static VulkanState cur_state; diff --git a/src/video_core/renderer_vulkan/vk_surface_params.cpp b/src/video_core/renderer_vulkan/vk_surface_params.cpp new file mode 100644 index 000000000..a2c297c9a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_surface_params.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" +#include "video_core/renderer_vulkan/vk_surface_params.h" + +namespace Vulkan { + +SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { + SurfaceParams params = *this; + const u32 tiled_size = is_tiled ? 8 : 1; + const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size); + PAddr aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); + PAddr aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); + + if (aligned_end - aligned_start > stride_tiled_bytes) { + params.addr = aligned_start; + params.height = (aligned_end - aligned_start) / BytesInPixels(stride); + } else { + // 1 row + ASSERT(aligned_end - aligned_start == stride_tiled_bytes); + const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); + aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); + aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); + params.addr = aligned_start; + params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size; + params.stride = params.width; + params.height = tiled_size; + } + params.UpdateParams(); + + return params; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle unscaled_rect) const { + if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { + return {}; + } + + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; + unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; + unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; + } + + const u32 stride_tiled = !is_tiled ? stride : stride * 8; + + const u32 pixel_offset = + stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + + unscaled_rect.left; + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); + + return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; +} + +SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { + SurfaceInterval result{}; + const auto valid_regions = + SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1)), + addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1))}; + + if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), + addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), + }; + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + + if (is_tiled) { + const int x0 = (begin_pixel_index % (stride * 8)) / 8; + const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom + return Common::Rectangle(x0, height - y0, x0 + sub_surface.width, + height - (y0 + sub_surface.height)); + } + + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + // Bottom to top + return Common::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); +} + +Common::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return std::tie(other_surface.addr, other_surface.width, other_surface.height, + other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == + std::tie(addr, width, height, stride, pixel_format, is_tiled) && + pixel_format != PixelFormat::Invalid; +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + return sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).right <= stride; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && + addr <= expanded_surface.end && expanded_surface.addr <= end && + is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % + BytesInPixels(stride * (is_tiled ? 8 : 1)) == + 0; +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || + end < texcopy_params.end) { + return false; + } + if (texcopy_params.width != texcopy_params.stride) { + const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1)); + return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && + ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; + } + return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_surface_params.h b/src/video_core/renderer_vulkan/vk_surface_params.h new file mode 100644 index 000000000..b041cce57 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_surface_params.h @@ -0,0 +1,270 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_texturing.h" + +namespace Vulkan { + +struct CachedSurface; +using Surface = std::shared_ptr; + +using SurfaceInterval = boost::icl::right_open_interval; + +struct SurfaceParams { +private: + static constexpr std::array BPP_TABLE = { + 32, // RGBA8 + 24, // RGB8 + 16, // RGB5A1 + 16, // RGB565 + 16, // RGBA4 + 16, // IA8 + 16, // RG8 + 8, // I8 + 8, // A8 + 8, // IA4 + 4, // I4 + 4, // A4 + 4, // ETC1 + 8, // ETC1A4 + 16, // D16 + 0, + 24, // D24 + 32, // D24S8 + }; + +public: + enum class PixelFormat { + // First 5 formats are shared between textures and color buffers + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + + // Texture-only formats + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, + + // Depth buffer-only formats + D16 = 14, + // gap + D24 = 16, + D24S8 = 17, + + Invalid = 255, + }; + + enum class SurfaceType { + Color = 0, + Texture = 1, + Depth = 2, + DepthStencil = 3, + Fill = 4, + Invalid = 5 + }; + + static constexpr unsigned int GetFormatBpp(PixelFormat format) { + const auto format_idx = static_cast(format); + DEBUG_ASSERT_MSG(format_idx < BPP_TABLE.size(), "Invalid pixel format {}", format_idx); + return BPP_TABLE[format_idx]; + } + + unsigned int GetFormatBpp() const { + return GetFormatBpp(pixel_format); + } + + static std::string_view PixelFormatAsString(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8: + return "RGBA8"; + case PixelFormat::RGB8: + return "RGB8"; + case PixelFormat::RGB5A1: + return "RGB5A1"; + case PixelFormat::RGB565: + return "RGB565"; + case PixelFormat::RGBA4: + return "RGBA4"; + case PixelFormat::IA8: + return "IA8"; + case PixelFormat::RG8: + return "RG8"; + case PixelFormat::I8: + return "I8"; + case PixelFormat::A8: + return "A8"; + case PixelFormat::IA4: + return "IA4"; + case PixelFormat::I4: + return "I4"; + case PixelFormat::A4: + return "A4"; + case PixelFormat::ETC1: + return "ETC1"; + case PixelFormat::ETC1A4: + return "ETC1A4"; + case PixelFormat::D16: + return "D16"; + case PixelFormat::D24: + return "D24"; + case PixelFormat::D24S8: + return "D24S8"; + default: + return "Not a real pixel format"; + } + } + + static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { + return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) { + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) { + return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) + : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { + switch (format) { + // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat + case GPU::Regs::PixelFormat::RGB565: + return PixelFormat::RGB565; + case GPU::Regs::PixelFormat::RGB5A1: + return PixelFormat::RGB5A1; + default: + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + } + + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { + SurfaceType a_type = GetFormatType(pixel_format_a); + SurfaceType b_type = GetFormatType(pixel_format_b); + + if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && + (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + return true; + } + + if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { + return true; + } + + if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { + return true; + } + + return false; + } + + static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { + if ((unsigned int)pixel_format < 5) { + return SurfaceType::Color; + } + + if ((unsigned int)pixel_format < 14) { + return SurfaceType::Texture; + } + + if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { + return SurfaceType::Depth; + } + + if (pixel_format == PixelFormat::D24S8) { + return SurfaceType::DepthStencil; + } + + return SurfaceType::Invalid; + } + + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" + /// and "pixel_format" + void UpdateParams() { + if (stride == 0) { + stride = width; + } + type = GetFormatType(pixel_format); + size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) + : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); + end = addr + size; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval(addr, end); + } + + // Returns the outer rectangle containing "interval" + SurfaceParams FromInterval(SurfaceInterval interval) const; + + SurfaceInterval GetSubRectInterval(Common::Rectangle unscaled_rect) const; + + // Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + + u32 GetScaledWidth() const { + return width * res_scale; + } + + u32 GetScaledHeight() const { + return height * res_scale; + } + + Common::Rectangle GetRect() const { + return {0, height, width, 0}; + } + + Common::Rectangle GetScaledRect() const { + return {0, GetScaledHeight(), GetScaledWidth(), 0}; + } + + u32 PixelsInBytes(u32 size) const { + return size * CHAR_BIT / GetFormatBpp(pixel_format); + } + + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + Common::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + Common::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index ffaa00455..c2d3fbc48 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -72,7 +72,6 @@ private: public: // Window attributes - GLFWwindow* window; uint32_t width = 0, height = 0; bool framebuffer_resized = false; std::string_view name; diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index 9399bc8cf..f9512ab8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -1,16 +1,20 @@ -#include "vk_texture.h" -#include "vk_buffer.h" -#include "vk_context.h" +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. -VkTexture::VkTexture(const std::shared_ptr& context) : - context(context), staging(context) -{ -} +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/renderer_vulkan/vk_instance.h" -void VkTexture::create(int width_, int height_, vk::ImageType type, vk::Format format_) +namespace Vulkan { + +void VKTexture::Create(const Info& info) { - auto& device = context->device; - format = format_; width = width_; height = height_; + auto& device = g_vk_instace->GetDevice(); + format = info.format; + width = info.width; + height = info.height; switch (format) { @@ -23,57 +27,73 @@ void VkTexture::create(int width_, int height_, vk::ImageType type, vk::Format f channels = 3; break; default: - throw std::runtime_error("[VK] Unknown texture format"); + LOG_CRITICAL(Render_Vulkan, "Unknown texture format {}", format); } - int image_size = width * height * channels; - staging.create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + // Create staging memory buffer for pixel transfers + u32 image_size = width * height * channels; + staging.Create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::BufferUsageFlagBits::eTransferSrc); pixels = staging.memory; // Create the texture + vk::ImageCreateFlags flags = info.view_type == vk::ImageViewType::eCube ? vk::ImageCreateFlagBits::eCubeCompatible : {}; vk::ImageCreateInfo image_info ( - {}, - type, + flags, + info.type, format, - { width, height, 1 }, 1, 1, + { width, height, 1 }, info.mipmap_levels, info.array_layers, vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal, vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled ); - texture = device->createImageUnique(image_info); + texture = device.createImageUnique(image_info); // Create texture memory - auto requirements = device->getImageMemoryRequirements(texture.get()); - auto memory_index = Buffer::find_memory_type(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal, context); + auto requirements = device.getImageMemoryRequirements(texture.get()); + auto memory_index = VKBuffer::FindMemoryType(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal); vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index); - texture_memory = device->allocateMemoryUnique(alloc_info); - device->bindImageMemory(texture.get(), texture_memory.get(), 0); + texture_memory = device.allocateMemoryUnique(alloc_info); + device.bindImageMemory(texture.get(), texture_memory.get(), 0); // Create texture view - vk::ImageViewCreateInfo view_info({}, texture.get(), vk::ImageViewType::e1D, format, {}, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); - texture_view = device->createImageViewUnique(view_info); + vk::ImageViewCreateInfo view_info({}, texture.get(), info.view_type, format, {}, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); + texture_view = device.createImageViewUnique(view_info); // Create texture sampler - auto props = context->physical_device.getProperties(); - vk::SamplerCreateInfo sampler_info({}, vk::Filter::eNearest, vk::Filter::eNearest, vk::SamplerMipmapMode::eNearest, vk::SamplerAddressMode::eClampToEdge, - vk::SamplerAddressMode::eClampToEdge, vk::SamplerAddressMode::eClampToEdge, {}, true, props.limits.maxSamplerAnisotropy, - false, vk::CompareOp::eAlways, {}, {}, vk::BorderColor::eIntOpaqueBlack, false); + auto properties = g_vk_instace->GetPhysicalDevice().getProperties(); + vk::SamplerCreateInfo sampler_info + ( + {}, + info.sampler_info.mag_filter, + info.sampler_info.min_filter, + info.sampler_info.mipmap_mode, + info.sampler_info.wrapping[0], info.sampler_info.wrapping[1], info.sampler_info.wrapping[2], + {}, + true, + properties.limits.maxSamplerAnisotropy, + false, + vk::CompareOp::eAlways, + {}, + {}, + vk::BorderColor::eIntOpaqueBlack, + false + ); - texture_sampler = device->createSamplerUnique(sampler_info); + texture_sampler = device.createSamplerUnique(sampler_info); } -void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout new_layout) +void VKTexture::TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout) { - auto& device = context->device; - auto& queue = context->graphics_queue; + auto& device = g_vk_instace->GetDevice(); + auto& queue = g_vk_instace->graphics_queue; - vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); - vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0]; + vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); + vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0]; command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); @@ -82,25 +102,23 @@ void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout ne std::array barriers = { barrier }; vk::PipelineStageFlags source_stage, destination_stage; - if (old_layout == vk::ImageLayout::eUndefined && new_layout == vk::ImageLayout::eTransferDstOptimal) - { + if (old_layout == vk::ImageLayout::eUndefined && new_layout == vk::ImageLayout::eTransferDstOptimal) { barrier.srcAccessMask = vk::AccessFlagBits::eNone; barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; source_stage = vk::PipelineStageFlagBits::eTopOfPipe; destination_stage = vk::PipelineStageFlagBits::eTransfer; } - else if (old_layout == vk::ImageLayout::eTransferDstOptimal && new_layout == vk::ImageLayout::eShaderReadOnlyOptimal) - { + else if (old_layout == vk::ImageLayout::eTransferDstOptimal && new_layout == vk::ImageLayout::eShaderReadOnlyOptimal) { barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; source_stage = vk::PipelineStageFlagBits::eTransfer; destination_stage = vk::PipelineStageFlagBits::eFragmentShader; } - else - { - throw std::invalid_argument("[VK] Unsupported layout transition!"); + else { + LOG_CRITICAL(Render_Vulkan, "Unsupported layout transition"); + UNREACHABLE(); } command_buffer.pipelineBarrier(source_stage, destination_stage, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); @@ -110,23 +128,23 @@ void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout ne queue.submit(submit_info, nullptr); queue.waitIdle(); - device->freeCommandBuffers(context->command_pool.get(), command_buffer); + device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer); } -void VkTexture::copy_pixels(uint8_t* new_pixels, uint32_t count) +void VKTexture::CopyPixels(std::span new_pixels) { - auto& device = context->device; - auto& queue = context->graphics_queue; + auto& device = g_vk_instace->GetDevice(); + auto& queue = g_vk_instace->graphics_queue; // Transition image to transfer format - transition_layout(vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal); + TransitionLayout(vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal); // Copy pixels to staging buffer - std::memcpy(pixels, new_pixels, count * channels); + std::memcpy(pixels, new_pixels.data(), new_pixels.size() * channels); // Copy the staging buffer to the image - vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); - vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0]; + vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); + vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0]; command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); @@ -140,8 +158,10 @@ void VkTexture::copy_pixels(uint8_t* new_pixels, uint32_t count) queue.submit(submit_info, nullptr); queue.waitIdle(); - device->freeCommandBuffers(context->command_pool.get(), command_buffer); + device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer); // Prepare for shader reads - transition_layout(vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); + TransitionLayout(vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); +} + } diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index cabfe27f8..c91c31966 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -1,29 +1,55 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #pragma once -#include "vk_buffer.h" + #include #include +#include "video_core/renderer_vulkan/vk_buffer.h" -class VkContext; +namespace Vulkan { -class VkTexture : public NonCopyable, public Resource -{ - friend class VkContext; +struct SamplerInfo { + std::array wrapping = { vk::SamplerAddressMode::eClampToEdge }; + vk::Filter min_filter = vk::Filter::eLinear; + vk::Filter mag_filter = vk::Filter::eLinear; + vk::SamplerMipmapMode mipmap_mode = vk::SamplerMipmapMode::eLinear; +}; + +/// Vulkan texture object +class VKTexture final : public NonCopyable { public: - VkTexture(const std::shared_ptr& context); - ~VkTexture() = default; + /// Information for the creation of the target texture + struct Info { + u32 width, height; + vk::Format format; + vk::ImageType type; + vk::ImageViewType view_type; + u32 mipmap_levels = 1; + u32 array_layers = 1; + SamplerInfo sampler_info = {}; + }; - void create(int width, int height, vk::ImageType type, vk::Format format = vk::Format::eR8G8B8A8Uint); - void copy_pixels(std::span pixels); + VKTexture() = default; + VKTexture(VKTexture&&) = default; + ~VKTexture() = default; + + /// Create a new Vulkan texture object along with its sampler + void Create(const Info& info); + + /// Copies CPU side pixel data to the GPU texture buffer + void CopyPixels(std::span pixels); private: - void transition_layout(vk::ImageLayout old_layout, vk::ImageLayout new_layout); + /// Used to transition the image to an optimal layout during transfers + void TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout); private: // Texture buffer void* pixels = nullptr; - std::shared_ptr context; uint32_t width = 0, height = 0, channels = 0; - Buffer staging; + VKBuffer staging; // Texture objects vk::UniqueImage texture; @@ -32,3 +58,24 @@ private: vk::UniqueSampler texture_sampler; vk::Format format; }; + +/// Vulkan framebuffer object similar to an FBO in OpenGL +class VKFramebuffer final : public NonCopyable { +public: + VKFramebuffer() = default; + ~VKFramebuffer() = default; + + // Create Vulkan framebuffer object + void Create(u32 width, u32 height, u32 layers, u32 samples); + + VkRect2D GetRect() const { return VkRect2D{{0, 0}, {width, height}}; } + +private: + u32 width, height; + vk::UniqueFramebuffer framebuffer; + vk::RenderPass load_renderpass; + vk::RenderPass discard_renderpass; + vk::RenderPass clear_renderpass; +}; + +}