diff --git a/src/common/common_types.h b/src/common/common_types.h index ee18eac81..2d723ec5a 100644 --- a/src/common/common_types.h +++ b/src/common/common_types.h @@ -56,6 +56,9 @@ protected: constexpr NonCopyable() = default; ~NonCopyable() = default; + // Enable std::move operations + NonCopyable(NonCopyable&&) = default; + NonCopyable(const NonCopyable&) = delete; NonCopyable& operator=(const NonCopyable&) = delete; }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d5796f468..fb808b1ee 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -73,12 +73,23 @@ add_library(video_core STATIC renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_buffer.cpp renderer_vulkan/vk_buffer.h - renderer_vulkan/vk_context.cpp - renderer_vulkan/vk_context.h - renderer_vulkan/vk_resource_manager.cpp - renderer_vulkan/vk_resource_manager.h + renderer_vulkan/vk_instance.cpp + renderer_vulkan/vk_instance.h + renderer_vulkan/vk_resource_cache.cpp + renderer_vulkan/vk_resource_cache.h + renderer_vulkan/vk_rasterizer_cache.cpp + renderer_vulkan/vk_rasterizer_cache.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_pipeline.cpp + renderer_vulkan/vk_pipeline.h + renderer_vulkan/vk_pipeline_manager.h + renderer_vulkan/vk_pipeline_manager.cpp + renderer_vulkan/vk_shader_state.h renderer_vulkan/vk_state.cpp renderer_vulkan/vk_state.h + renderer_vulkan/vk_surface_params.cpp + renderer_vulkan/vk_surface_params.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture.cpp diff --git a/src/video_core/renderer_vulkan/vk_buffer.cpp b/src/video_core/renderer_vulkan/vk_buffer.cpp index a15d4310d..8a791dc49 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer.cpp @@ -1,64 +1,59 @@ -#include "vk_buffer.h" -#include "vk_context.h" -#include +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_buffer.h" +#include "video_core/renderer_vulkan/vk_instance.h" #include #include #include -Buffer::Buffer(std::shared_ptr context) : - context(context) +namespace Vulkan { + +VKBuffer::~VKBuffer() { + if (memory != nullptr) { + g_vk_instace->GetDevice().unmapMemory(buffer_memory.get()); + } } -Buffer::~Buffer() +void VKBuffer::Create(uint32_t byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, vk::Format view_format) { - auto& device = context->device; - if (memory != nullptr) - device->unmapMemory(buffer_memory.get()); -} - -void Buffer::create(uint32_t byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage) -{ - auto& device = context->device; + auto& device = g_vk_instace->GetDevice(); size = byte_count; vk::BufferCreateInfo bufferInfo({}, byte_count, usage); - buffer = device->createBufferUnique(bufferInfo); + buffer = device.createBufferUnique(bufferInfo); - auto mem_requirements = device->getBufferMemoryRequirements(buffer.get()); + auto mem_requirements = device.getBufferMemoryRequirements(buffer.get()); - auto memory_type_index = find_memory_type(mem_requirements.memoryTypeBits, properties, context); + auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, properties); vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index); - buffer_memory = device->allocateMemoryUnique(alloc_info); - device->bindBufferMemory(buffer.get(), buffer_memory.get(), 0); + buffer_memory = device.allocateMemoryUnique(alloc_info); + device.bindBufferMemory(buffer.get(), buffer_memory.get(), 0); // Optionally map the buffer to CPU memory if (properties & vk::MemoryPropertyFlagBits::eHostVisible) - memory = device->mapMemory(buffer_memory.get(), 0, byte_count); + memory = device.mapMemory(buffer_memory.get(), 0, byte_count); // Create buffer view for texel buffers if (usage & vk::BufferUsageFlagBits::eStorageTexelBuffer || usage & vk::BufferUsageFlagBits::eUniformTexelBuffer) { - vk::BufferViewCreateInfo view_info({}, buffer.get(), vk::Format::eR32Uint, 0, byte_count); - buffer_view = device->createBufferViewUnique(view_info); + vk::BufferViewCreateInfo view_info({}, buffer.get(), view_format, 0, byte_count); + buffer_view = device.createBufferViewUnique(view_info); } } -void Buffer::bind(vk::CommandBuffer& command_buffer) +void VKBuffer::CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region) { - vk::DeviceSize offsets[1] = { 0 }; - command_buffer.bindVertexBuffers(0, 1, &buffer.get(), offsets); -} + auto& device = g_vk_instace->GetDevice(); + auto& queue = g_vk_instace->graphics_queue; -void Buffer::copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::BufferCopy& region) -{ - auto& context = src_buffer.context; - auto& device = context->device; - auto& queue = context->graphics_queue; - - vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); - vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0]; + vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); + vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0]; command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); command_buffer.copyBuffer(src_buffer.buffer.get(), dst_buffer.buffer.get(), region); @@ -68,12 +63,12 @@ void Buffer::copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::Buffe queue.submit(submit_info, nullptr); queue.waitIdle(); - device->freeCommandBuffers(context->command_pool.get(), command_buffer); + device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer); } -uint32_t Buffer::find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr context) +uint32_t VKBuffer::FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties) { - vk::PhysicalDeviceMemoryProperties mem_properties = context->physical_device.getMemoryProperties(); + vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties(); for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { @@ -82,32 +77,8 @@ uint32_t Buffer::find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags return i; } - throw std::runtime_error("[VK] Failed to find suitable memory type!"); + LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type."); + UNREACHABLE(); } -VertexBuffer::VertexBuffer(const std::shared_ptr& context) : - host(context), local(context), context(context) -{ -} - -void VertexBuffer::create(uint32_t vertex_count) -{ - // Create a host and local buffer - auto byte_count = sizeof(Vertex) * vertex_count; - local.create(byte_count, vk::MemoryPropertyFlagBits::eDeviceLocal, - vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eVertexBuffer); - host.create(byte_count, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, - vk::BufferUsageFlagBits::eTransferSrc); -} - -void VertexBuffer::copy_vertices(Vertex* vertices, uint32_t count) -{ - auto byte_count = count * sizeof(Vertex); - std::memcpy(host.memory, vertices, byte_count); - Buffer::copy_buffer(host, local, { 0, 0, byte_count }); -} - -void VertexBuffer::bind(vk::CommandBuffer& command_buffer) -{ - local.bind(command_buffer); } diff --git a/src/video_core/renderer_vulkan/vk_buffer.h b/src/video_core/renderer_vulkan/vk_buffer.h index cd82a251e..359da7a0e 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.h +++ b/src/video_core/renderer_vulkan/vk_buffer.h @@ -1,48 +1,28 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #pragma once -#include -#include + #include #include +#include #include "common/common_types.h" -class VkContext; +namespace Vulkan { -struct VertexInfo -{ - VertexInfo() = default; - VertexInfo(glm::vec3 position, glm::vec3 color, glm::vec2 coords) : - position(position), color(color), texcoords(coords) {}; - - glm::vec3 position; - glm::vec3 color; - glm::vec2 texcoords; -}; - -struct Vertex : public VertexInfo -{ - Vertex() = default; - Vertex(glm::vec3 position, glm::vec3 color = {}, glm::vec2 coords = {}) : VertexInfo(position, color, coords) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); - static constexpr std::array attribute_desc = - { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, color)), - vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, texcoords)), - }; -}; - -class Buffer : public NonCopyable, public Resource -{ - friend class VertexBuffer; +/// Generic Vulkan buffer object used by almost every resource +class VKBuffer final : public NonCopyable { public: - Buffer(std::shared_ptr context); - ~Buffer(); + VKBuffer() = default; + VKBuffer(VKBuffer&&) = default; + ~VKBuffer(); - void create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage); - void bind(vk::CommandBuffer& command_buffer); + /// Create a generic Vulkan buffer object + void Create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, vk::Format view_format = vk::Format::eUndefined); - static uint32_t find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr context); - static void copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::BufferCopy& region); + static uint32_t FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties); + static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region); public: void* memory = nullptr; @@ -50,22 +30,6 @@ public: vk::UniqueDeviceMemory buffer_memory; vk::UniqueBufferView buffer_view; uint32_t size = 0; - -protected: - std::shared_ptr context; }; -class VertexBuffer -{ -public: - VertexBuffer(const std::shared_ptr& context); - ~VertexBuffer() = default; - - void create(uint32_t vertex_count); - void copy_vertices(Vertex* vertices, uint32_t count); - void bind(vk::CommandBuffer& command_buffer); - -private: - Buffer host, local; - std::shared_ptr context; -}; +} diff --git a/src/video_core/renderer_vulkan/vk_context.h b/src/video_core/renderer_vulkan/vk_context.h deleted file mode 100644 index 1e4d576bd..000000000 --- a/src/video_core/renderer_vulkan/vk_context.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once -#include "vk_swapchain.h" -#include -#include - -class VkWindow; -class VkContext; - -constexpr int MAX_BINDING_COUNT = 10; - -struct PipelineLayoutInfo -{ - friend class VkContext; - PipelineLayoutInfo(const std::shared_ptr& context); - ~PipelineLayoutInfo(); - - void add_shader_module(std::string_view filepath, vk::ShaderStageFlagBits stage); - void add_resource(Resource* resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int binding, int group = 0); - -private: - using DescInfo = std::pair, std::vector>; - - std::shared_ptr context; - std::unordered_map resource_types; - std::unordered_map needed; - std::vector shader_stages; -}; - -class VkTexture; - -// The vulkan context. Can only be created by the window -class VkContext -{ - friend class VkWindow; -public: - VkContext(vk::UniqueInstance&& instance, VkWindow* window); - ~VkContext(); - - void create(SwapchainInfo& info); - void create_graphics_pipeline(PipelineLayoutInfo& info); - - vk::CommandBuffer& get_command_buffer(); - -private: - void create_devices(int device_id = 0); - void create_renderpass(); - void create_command_buffers(); - void create_decriptor_sets(PipelineLayoutInfo& info); - -public: - // Queue family indexes - uint32_t queue_family = -1; - - // Core vulkan objects - vk::UniqueInstance instance; - vk::PhysicalDevice physical_device; - vk::UniqueDevice device; - vk::Queue graphics_queue; - - // Pipeline - vk::UniquePipelineLayout pipeline_layout; - vk::UniquePipeline graphics_pipeline; - vk::UniqueRenderPass renderpass; - vk::UniqueDescriptorPool descriptor_pool; - std::array, MAX_FRAMES_IN_FLIGHT> descriptor_layouts; - std::array, MAX_FRAMES_IN_FLIGHT> descriptor_sets; - - // Command buffer - vk::UniqueCommandPool command_pool; - std::vector command_buffers; - - // Window - VkWindow* window; - SwapchainInfo swapchain_info; -}; diff --git a/src/video_core/renderer_vulkan/vk_context.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp similarity index 100% rename from src/video_core/renderer_vulkan/vk_context.cpp rename to src/video_core/renderer_vulkan/vk_instance.cpp diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h new file mode 100644 index 000000000..70900f458 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -0,0 +1,72 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "common/common_types.h" + +namespace Vulkan { + +// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have +// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger +// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine +#ifdef ANDROID +// Reduce the size of swap_chain, since the UI only allows upto 200% speed. +constexpr std::size_t SWAP_CHAIN_SIZE = 6; +#else +constexpr std::size_t SWAP_CHAIN_SIZE = 9; +#endif + +/// The global Vulkan instance +class VKInstance +{ +public: + VKInstance() = default; + ~VKInstance(); + + /// Construct global Vulkan context + void Create(vk::UniqueInstance instance, vk::PhysicalDevice gpu, vk::UniqueSurfaceKHR surface, + bool enable_debug_reports, bool enable_validation_layer); + + vk::Device& GetDevice() { return device.get(); } + vk::PhysicalDevice& GetPhysicalDevice() { return physical_device; } + + /// Get a valid command buffer for the current frame + vk::CommandBuffer& GetCommandBuffer(); + + /// Feature support + bool SupportsAnisotropicFiltering() const; + +private: + void CreateDevices(int device_id = 0); + void CreateRenderpass(); + void CreateCommandBuffers(); + +public: + // Queue family indexes + u32 queue_family = -1; + + // Core vulkan objects + vk::UniqueInstance instance; + vk::PhysicalDevice physical_device; + vk::UniqueDevice device; + vk::Queue graphics_queue; + + // Pipeline + vk::UniqueDescriptorPool descriptor_pool; + std::array, SWAP_CHAIN_SIZE> descriptor_layouts; + std::array, SWAP_CHAIN_SIZE> descriptor_sets; + + // Command buffer + vk::UniqueCommandPool command_pool; + std::vector command_buffers; +}; + +extern std::unique_ptr g_vk_instace; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.cpp b/src/video_core/renderer_vulkan/vk_pipeline.cpp new file mode 100644 index 000000000..6532b2ba1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.cpp @@ -0,0 +1,61 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include + +namespace Vulkan { + +shaderc::Compiler compiler; + +void VKPipeline::Info::AddShaderModule(const vk::ShaderModule& source, vk::ShaderStageFlagBits stage) +{ + shaderc_shader_kind shader_stage; + std::string name; + switch (stage) + { + case vk::ShaderStageFlagBits::eVertex: + shader_stage = shaderc_glsl_vertex_shader; + name = "Vertex shader"; + break; + case vk::ShaderStageFlagBits::eCompute: + shader_stage = shaderc_glsl_compute_shader; + name = "Compute shader"; + break; + case vk::ShaderStageFlagBits::eFragment: + shader_stage = shaderc_glsl_fragment_shader; + name = "Fragment shader"; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown vulkan shader stage {}", stage); + UNREACHABLE(); + } + + shaderc::CompileOptions options; + options.SetOptimizationLevel(shaderc_optimization_level_performance); + options.SetAutoBindUniforms(true); + options.SetAutoMapLocations(true); + options.SetTargetEnvironment(shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_2); + + auto result = compiler.CompileGlslToSpv(source.c_str(), shader_stage, name.c_str(), options); + if (result.GetCompilationStatus() != shaderc_compilation_status_success) { + LOG_CRITICAL(Render_Vulkan, "Failed to compile GLSL shader with error: {}", result.GetErrorMessage()); + UNREACHABLE(); + } + + auto shader_code = std::vector{ result.cbegin(), result.cend() }; + + vk::ShaderModuleCreateInfo module_info({}, shader_code); + auto module = g_vk_instace->GetDevice().createShaderModuleUnique(module_info); + shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module.get(), "main"); + + return std::move(module); +} + + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..1e9bac825 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,49 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/renderer_vulkan/vk_buffer.h" +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace Vulkan { + +using Resource = std::variant; + +/// Vulkan pipeline objects represent a collection of shader modules +class VKPipeline final : private NonCopyable { +public: + /// Includes all required information to build a Vulkan pipeline object + class Info : private NonCopyable { + Info() = default; + ~Info() = default; + + /// Assign a shader module to a specific stage + void AddShaderModule(const vk::ShaderModule& module, vk::ShaderStageFlagBits stage); + + /// Add a texture or a buffer to the target descriptor set + void AddResource(const Resource& resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int set = 0); + + private: + using ResourceInfo = std::pair, vk::DescriptorSetLayoutBinding>; + + std::unordered_map> descriptor_sets; + std::vector shader_stages; + }; + + VKPipeline() = default; + ~VKPipeline() = default; + + /// Create a new Vulkan pipeline object + void Create(const Info& info); + void Create(vk::PipelineLayoutCreateInfo layout_info); + +private: + vk::UniquePipeline pipeline; + vk::UniquePipelineLayout pipeline_layout; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_pipeline_manager.cpp b/src/video_core/renderer_vulkan/vk_pipeline_manager.cpp new file mode 100644 index 000000000..a6bdd5e78 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_manager.cpp @@ -0,0 +1,668 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "video_core/video_core.h" +#include "core/frontend/scope_acquire_context.h" +#include "video_core/renderer_vulkan/vk_pipeline_manager.h" +#include +#include +#include +#include +#include + +namespace Vulkan { + +static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) { + std::size_t hash = 0; + u64 regs_uid = Common::ComputeHash64(regs.reg_array.data(), Pica::Regs::NUM_REGS * sizeof(u32)); + boost::hash_combine(hash, regs_uid); + if (code.size() > 0) { + u64 code_uid = Common::ComputeHash64(code.data(), code.size() * sizeof(u32)); + boost::hash_combine(hash, code_uid); + } + return static_cast(hash); +} + +static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, + const std::set& supported_formats, + bool separable) { + + if (supported_formats.find(dump.binary_format) == supported_formats.end()) { + LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); + return {}; + } + + auto shader = OGLProgram(); + shader.handle = glCreateProgram(); + if (separable) { + glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + } + glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(), + static_cast(dump.binary.size())); + + GLint link_status{}; + glGetProgramiv(shader.handle, GL_LINK_STATUS, &link_status); + if (link_status == GL_FALSE) { + LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); + return {}; + } + + return shader; +} + +static std::set GetSupportedFormats() { + std::set supported_formats; + + GLint num_formats{}; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + for (const GLint format : formats) + supported_formats.insert(static_cast(format)); + return supported_formats; +} + +static std::tuple BuildVSConfigFromRaw( + const ShaderDiskCacheRaw& raw) { + Pica::Shader::ProgramCode program_code{}; + Pica::Shader::SwizzleData swizzle_data{}; + std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH, + program_code.begin()); + std::copy_n(raw.GetProgramCode().begin() + Pica::Shader::MAX_PROGRAM_CODE_LENGTH, + Pica::Shader::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin()); + Pica::Shader::ShaderSetup setup; + setup.program_code = program_code; + setup.swizzle_data = swizzle_data; + return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup}; +} + +static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, + std::size_t expected_size) { + const GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index == GL_INVALID_INDEX) { + return; + } + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got {}, expected {}", + static_cast(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast(binding)); +} + +static void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, + sizeof(UniformData)); + SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, sizeof(VSUniformData)); +} + +static void SetShaderSamplerBinding(GLuint shader, const char* name, + TextureUnits::TextureUnit binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, binding.id); + } +} + +static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, static_cast(binding)); + } +} + +static void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0)); + SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1)); + SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2)); + SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); + + // Set the texture samplers to correspond to different lookup table texture units + SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); + + SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer); + SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX); + SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX); + SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY); + SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY); + SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ); + SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ); + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, + const Pica::Shader::ShaderSetup& setup) { + std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), + [](bool value) -> BoolAligned { return {value ? GL_TRUE : GL_FALSE}; }); + std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), + [](const auto& value) -> GLuvec4 { + return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; + }); + std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), + [](const auto& value) -> GLvec4 { + return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), + value.w.ToFloat32()}; + }); +} + +template +class ShaderCache { +public: + explicit ShaderCache(bool separable) : separable(separable) {} + std::tuple> Get( + const KeyConfigType& config) { + auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); + OGLShaderStage& cached_shader = iter->second; + std::optional result{}; + if (new_shader) { + result = CodeGenerator(config, separable); + cached_shader.Create(result->code.c_str(), ShaderType); + } + return {cached_shader.GetHandle(), std::move(result)}; + } + + void Inject(const KeyConfigType& key, OGLProgram&& program) { + OGLShaderStage stage{separable}; + stage.Inject(std::move(program)); + shaders.emplace(key, std::move(stage)); + } + + void Inject(const KeyConfigType& key, OGLShaderStage&& stage) { + shaders.emplace(key, std::move(stage)); + } + +private: + bool separable; + std::unordered_map shaders; +}; + +// This is a cache designed for shaders translated from PICA shaders. The first cache matches the +// config structure like a normal cache does. On cache miss, the second cache matches the generated +// GLSL code. The configuration is like this because there might be leftover code in the PICA shader +// program buffer from the previous shader, which is hashed into the config, resulting several +// different config values from the same shader program. +template (*CodeGenerator)( + const Pica::Shader::ShaderSetup&, const KeyConfigType&, bool), + GLenum ShaderType> +class ShaderDoubleCache { +public: + explicit ShaderDoubleCache(bool separable) : separable(separable) {} + std::tuple> Get( + const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) { + std::optional result{}; + auto map_it = shader_map.find(key); + if (map_it == shader_map.end()) { + auto program_opt = CodeGenerator(setup, key, separable); + if (!program_opt) { + shader_map[key] = nullptr; + return {0, std::nullopt}; + } + + std::string& program = program_opt->code; + auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{separable}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + result.emplace(); + result->code = program; + cached_shader.Create(program.c_str(), ShaderType); + } + shader_map[key] = &cached_shader; + return {cached_shader.GetHandle(), std::move(result)}; + } + + if (map_it->second == nullptr) { + return {0, std::nullopt}; + } + + return {map_it->second->GetHandle(), std::nullopt}; + } + + void Inject(const KeyConfigType& key, std::string decomp, OGLProgram&& program) { + OGLShaderStage stage{separable}; + stage.Inject(std::move(program)); + const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first; + OGLShaderStage& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + + void Inject(const KeyConfigType& key, std::string decomp, OGLShaderStage&& stage) { + const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first; + OGLShaderStage& cached_shader = iter->second; + shader_map.insert_or_assign(key, &cached_shader); + } + +private: + bool separable; + std::unordered_map shader_map; + std::unordered_map shader_cache; +}; + +using ProgrammableVertexShaders = + ShaderDoubleCache; + +using FixedGeometryShaders = + ShaderCache; + +using FragmentShaders = ShaderCache; + +class ShaderProgramManager::Impl { +public: + explicit Impl(bool separable, bool is_amd) + : is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable), + trivial_vertex_shader(separable), fixed_geometry_shaders(separable), + fragment_shaders(separable), disk_cache(separable) { + if (separable) + pipeline.Create(); + } + + struct ShaderTuple { + GLuint vs = 0; + GLuint gs = 0; + GLuint fs = 0; + + std::size_t vs_hash = 0; + std::size_t gs_hash = 0; + std::size_t fs_hash = 0; + + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + bool operator!=(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + std::size_t GetConfigHash() const { + std::size_t hash = 0; + boost::hash_combine(hash, vs_hash); + boost::hash_combine(hash, gs_hash); + boost::hash_combine(hash, fs_hash); + return hash; + } + }; + + bool is_amd; + bool separable; + + ShaderTuple current; + + ProgrammableVertexShaders programmable_vertex_shaders; + TrivialVertexShader trivial_vertex_shader; + + FixedGeometryShaders fixed_geometry_shaders; + + FragmentShaders fragment_shaders; + std::unordered_map program_cache; + OGLPipeline pipeline; + ShaderDiskCache disk_cache; +}; + +ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, + bool is_amd) + : impl(std::make_unique(separable, is_amd)), emu_window{emu_window_} {} + +ShaderProgramManager::~ShaderProgramManager() = default; + +bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, + Pica::Shader::ShaderSetup& setup) { + PicaVSConfig config{regs.vs, setup}; + auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup); + if (handle == 0) + return false; + impl->current.vs = handle; + impl->current.vs_hash = config.Hash(); + + // Save VS to the disk cache if its a new shader + if (result) { + auto& disk_cache = impl->disk_cache; + ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()}; + program_code.insert(program_code.end(), setup.swizzle_data.begin(), + setup.swizzle_data.end()); + const u64 unique_identifier = GetUniqueIdentifier(regs, program_code); + const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, + std::move(program_code)}; + disk_cache.SaveRaw(raw); + disk_cache.SaveDecompiled(unique_identifier, *result, VideoCore::g_hw_shader_accurate_mul); + } + return true; +} + +void ShaderProgramManager::UseTrivialVertexShader() { + impl->current.vs = impl->trivial_vertex_shader.Get(); + impl->current.vs_hash = 0; +} + +void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { + PicaFixedGSConfig gs_config(regs); + auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); + impl->current.gs = handle; + impl->current.gs_hash = gs_config.Hash(); +} + +void ShaderProgramManager::UseTrivialGeometryShader() { + impl->current.gs = 0; + impl->current.gs_hash = 0; +} + +void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) { + PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); + auto [handle, result] = impl->fragment_shaders.Get(config); + impl->current.fs = handle; + impl->current.fs_hash = config.Hash(); + // Save FS to the disk cache if its a new shader + if (result) { + auto& disk_cache = impl->disk_cache; + u64 unique_identifier = GetUniqueIdentifier(regs, {}); + ShaderDiskCacheRaw raw{unique_identifier, ProgramType::FS, regs, {}}; + disk_cache.SaveRaw(raw); + disk_cache.SaveDecompiled(unique_identifier, *result, false); + } +} + +void ShaderProgramManager::ApplyTo(OpenGLState& state) { + if (impl->separable) { + if (impl->is_amd) { + // Without this reseting, AMD sometimes freezes when one stage is changed but not + // for the others. On the other hand, including this reset seems to introduce memory + // leak in Intel Graphics. + glUseProgramStages( + impl->pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0); + } + + glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs); + glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs); + glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = impl->pipeline.handle; + } else { + const u64 unique_identifier = impl->current.GetConfigHash(); + OGLProgram& cached_program = impl->program_cache[unique_identifier]; + if (cached_program.handle == 0) { + cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + auto& disk_cache = impl->disk_cache; + disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle, + VideoCore::g_hw_shader_accurate_mul); + + SetShaderUniformBlockBindings(cached_program.handle); + SetShaderSamplerBindings(cached_program.handle); + } + state.draw.shader_program = cached_program.handle; + } +} + +void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (!GLAD_GL_ARB_get_program_binary && !GLES) { + LOG_ERROR(Render_OpenGL, + "Cannot load disk cache as ARB_get_program_binary is not supported!"); + return; + } + + auto& disk_cache = impl->disk_cache; + const auto transferable = disk_cache.LoadTransferable(); + if (!transferable) { + return; + } + const auto& raws = *transferable; + + // Load uncompressed precompiled file for non-separable shaders. + // Precompiled file for separable shaders is compressed. + auto [decompiled, dumps] = disk_cache.LoadPrecompiled(impl->separable); + + if (stop_loading) { + return; + } + + std::set supported_formats = GetSupportedFormats(); + + // Track if precompiled cache was altered during loading to know if we have to serialize the + // virtual precompiled cache file back to the hard drive + bool precompiled_cache_altered = false; + + std::mutex mutex; + std::atomic_bool compilation_failed = false; + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); + } + std::vector load_raws_index; + // Loads both decompiled and precompiled shaders from the cache. If either one is missing for + const auto LoadPrecompiledShader = [&](std::size_t begin, std::size_t end, + const std::vector& raw_cache, + const ShaderDecompiledMap& decompiled_map, + const ShaderDumpsMap& dump_map) { + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + const auto& raw{raw_cache[i]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + + const u64 calculated_hash = + GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode()); + if (unique_identifier != calculated_hash) { + LOG_ERROR(Render_OpenGL, + "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing " + "shader cache", + raw.GetUniqueIdentifier(), calculated_hash); + disk_cache.InvalidateAll(); + return; + } + + const auto dump{dump_map.find(unique_identifier)}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + OGLProgram shader; + + if (dump != dump_map.end() && decomp != decompiled_map.end()) { + // Only load the vertex shader if its sanitize_mul setting matches + if (raw.GetProgramType() == ProgramType::VS && + decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader is dumped, attempt to load it + shader = + GeneratePrecompiledProgram(dump->second, supported_formats, impl->separable); + if (shader.handle == 0) { + // If any shader failed, stop trying to compile, delete the cache, and start + // loading from raws + compilation_failed = true; + return; + } + // we have both the binary shader and the decompiled, so inject it into the + // cache + if (raw.GetProgramType() == ProgramType::VS) { + auto [conf, setup] = BuildVSConfigFromRaw(raw); + std::scoped_lock lock(mutex); + impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, + std::move(shader)); + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + std::scoped_lock lock(mutex); + impl->fragment_shaders.Inject(conf, std::move(shader)); + } else { + // Unsupported shader type got stored somehow so nuke the cache + + LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}", + raw.GetProgramType()); + compilation_failed = true; + return; + } + } else { + // Since precompiled didn't have the dump, we'll load them in the next phase + std::scoped_lock lock(mutex); + load_raws_index.push_back(i); + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size()); + } + } + }; + + const auto LoadPrecompiledProgram = [&](const ShaderDecompiledMap& decompiled_map, + const ShaderDumpsMap& dump_map) { + std::size_t i{0}; + for (const auto& dump : dump_map) { + if (stop_loading) { + break; + } + const u64 unique_identifier{dump.first}; + const auto decomp{decompiled_map.find(unique_identifier)}; + + // Only load the program if its sanitize_mul setting matches + if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) { + continue; + } + + // If the shader program is dumped, attempt to load it + OGLProgram shader = + GeneratePrecompiledProgram(dump.second, supported_formats, impl->separable); + if (shader.handle != 0) { + SetShaderUniformBlockBindings(shader.handle); + SetShaderSamplerBindings(shader.handle); + impl->program_cache.emplace(unique_identifier, std::move(shader)); + } else { + LOG_ERROR(Frontend, "Failed to link Precompiled program!"); + compilation_failed = true; + break; + } + if (callback) { + callback(VideoCore::LoadCallbackStage::Decompile, ++i, dump_map.size()); + } + } + }; + + if (impl->separable) { + LoadPrecompiledShader(0, raws.size(), raws, decompiled, dumps); + } else { + LoadPrecompiledProgram(decompiled, dumps); + } + + bool load_all_raws = false; + if (compilation_failed) { + // Invalidate the precompiled cache if a shader dumped shader was rejected + impl->program_cache.clear(); + disk_cache.InvalidatePrecompiled(); + dumps.clear(); + precompiled_cache_altered = true; + load_all_raws = true; + } + // TODO(SachinV): Skip loading raws until we implement a proper way to link non-seperable + // shaders. + if (!impl->separable) { + return; + } + + const std::size_t load_raws_size = load_all_raws ? raws.size() : load_raws_index.size(); + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, 0, load_raws_size); + } + + compilation_failed = false; + + std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex + const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end) { + Frontend::ScopeAcquireContext scope(*context); + for (std::size_t i = begin; i < end; ++i) { + if (stop_loading || compilation_failed) { + return; + } + + const std::size_t raws_index = load_all_raws ? i : load_raws_index[i]; + const auto& raw{raws[raws_index]}; + const u64 unique_identifier{raw.GetUniqueIdentifier()}; + + bool sanitize_mul = false; + GLuint handle{0}; + std::optional result; + // Otherwise decompile and build the shader at boot and save the result to the + // precompiled file + if (raw.GetProgramType() == ProgramType::VS) { + auto [conf, setup] = BuildVSConfigFromRaw(raw); + result = GenerateVertexShader(setup, conf, impl->separable); + OGLShaderStage stage{impl->separable}; + stage.Create(result->code.c_str(), GL_VERTEX_SHADER); + handle = stage.GetHandle(); + sanitize_mul = conf.state.sanitize_mul; + std::scoped_lock lock(mutex); + impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage)); + } else if (raw.GetProgramType() == ProgramType::FS) { + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + result = GenerateFragmentShader(conf, impl->separable); + OGLShaderStage stage{impl->separable}; + stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER); + handle = stage.GetHandle(); + std::scoped_lock lock(mutex); + impl->fragment_shaders.Inject(conf, std::move(stage)); + } else { + // Unsupported shader type got stored somehow so nuke the cache + LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType()); + compilation_failed = true; + return; + } + if (handle == 0) { + LOG_ERROR(Frontend, "compilation from raw failed {:x} {:x}", + raw.GetProgramCode().at(0), raw.GetProgramCode().at(1)); + compilation_failed = true; + return; + } + + std::scoped_lock lock(mutex); + // If this is a new separable shader, add it the precompiled cache + if (result) { + disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); + disk_cache.SaveDump(unique_identifier, handle); + precompiled_cache_altered = true; + } + + if (callback) { + callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, load_raws_size); + } + } + }; + + const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; + const std::size_t bucket_size{load_raws_size / num_workers}; + std::vector> contexts(num_workers); + std::vector threads(num_workers); + for (std::size_t i = 0; i < num_workers; ++i) { + const bool is_last_worker = i + 1 == num_workers; + const std::size_t start{bucket_size * i}; + const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size}; + + // On some platforms the shared context has to be created from the GUI thread + contexts[i] = emu_window.CreateSharedContext(); + threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end); + } + for (auto& thread : threads) { + thread.join(); + } + + if (compilation_failed) { + disk_cache.InvalidateAll(); + } + + if (precompiled_cache_altered) { + disk_cache.SaveVirtualPrecompiledFile(); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_pipeline_manager.h b/src/video_core/renderer_vulkan/vk_pipeline_manager.h new file mode 100644 index 000000000..8b00b2c94 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_manager.h @@ -0,0 +1,131 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "video_core/rasterizer_interface.h" +#include "video_core/regs_lighting.h" +#include "video_core/renderer_vulkan/pica_to_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_state.h" +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace Core { +class System; +} + +namespace Vulkan { + +enum class UniformBindings : GLuint { Common, VS, GS }; + +struct LightSrc { + alignas(16) glm::vec3 specular_0; + alignas(16) glm::vec3 specular_1; + alignas(16) glm::vec3 diffuse; + alignas(16) glm::vec3 ambient; + alignas(16) glm::vec3 position; + alignas(16) glm::vec3 spot_direction; // negated + float dist_atten_bias; + float dist_atten_scale; +}; + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct UniformData { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + alignas(16) glm::ivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; + alignas(16) glm::vec3 fog_color; + alignas(8) glm::vec2 proctex_noise_f; + alignas(8) glm::vec2 proctex_noise_a; + alignas(8) glm::vec2 proctex_noise_p; + alignas(16) glm::vec3 lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) glm::vec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) glm::vec4 tev_combiner_buffer_color; + alignas(16) glm::vec4 clip_coef; +}; + +static_assert(sizeof(UniformData) == 0x4F0, "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. +// NOTE: the same rule from UniformData also applies here. +struct PicaUniformsData { + void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); + + struct BoolAligned { + alignas(16) int b; + }; + + std::array bools; + alignas(16) std::array i; + alignas(16) std::array f; +}; + +struct VSUniformData { + PicaUniformsData uniforms; +}; + +static_assert(sizeof(VSUniformData) == 1856, "The size of the VSUniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(VSUniformData) < 16384, "VSUniformData structure must be less than 16kb as per the Vulkan spec"); + +using Resource = std::variant; + +/// Includes all required information to build a Vulkan pipeline object +class VKPipelineInfo : private NonCopyable { + VKPipelineInfo() = default; + ~VKPipelineInfo() = default; + + /// Assign a shader module to a specific stage + void AddShaderModule(const vk::ShaderModule& module, vk::ShaderStageFlagBits stage); + + /// Add a texture or a buffer to the target descriptor set + void AddResource(const Resource& resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int set = 0); + +private: + using ResourceInfo = std::pair, vk::DescriptorSetLayoutBinding>; + + std::unordered_map> descriptor_sets; + std::vector shader_stages; +}; + +/// A class that manages the storage and management of Vulkan pipeline objects. +class PipelineManager { +public: + PipelineManager(Frontend::EmuWindow& emu_window); + ~PipelineManager(); + + /// Retrieves the Vulkan pipeline that maps to the current PICA state. + /// If not present, it is compiled and cached + vk::Pipeline GetPipeline(const Pica::Regs& config, Pica::Shader::ShaderSetup& setup); + +private: + std::unordered_map pipelines; + vk::UniquePipelineCache pipeline_cache; + + Frontend::EmuWindow& emu_window; +}; +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..2f04412a4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,2252 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/regs_texturing.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_vars.h" +#include "video_core/renderer_opengl/pica_to_gl.h" +#include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/video_core.h" + +namespace OpenGL { + +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + +MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0)); +MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); + +static bool IsVendorAmd() { + const std::string_view gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; + return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc."; +} +static bool IsVendorIntel() { + std::string gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; + return gpu_vendor == "Intel Inc."; +} + +RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window) + : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), + uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), + index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), + texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), + texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { + + allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && + GLAD_GL_ARB_framebuffer_no_attachments); + if (!allow_shadow) { + LOG_WARNING(Render_OpenGL, + "Shadow might not be able to render because of unsupported OpenGL extensions."); + } + + if (!GLAD_GL_ARB_copy_image && !GLES) { + LOG_WARNING(Render_OpenGL, + "ARB_copy_image not supported. Some games might produce artifacts."); + } + + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 + state.clip_distance[0] = true; + + // Create a 1x1 clear texture to use in the NULL case, + // instead of OpenGL's default of solid black + glGenTextures(1, &default_texture); + glBindTexture(GL_TEXTURE_2D, default_texture); + // For some reason alpha 0 wraps around to 1.0, so use 1/255 instead + u8 framebuffer_data[4] = {0, 0, 0, 1}; + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); + + // Create sampler objects + for (std::size_t i = 0; i < texture_samplers.size(); ++i) { + texture_samplers[i].Create(); + state.texture_units[i].sampler = texture_samplers[i].sampler.handle; + } + + // Create cubemap texture and sampler objects + texture_cube_sampler.Create(); + state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; + + // Generate VAO + sw_vao.Create(); + hw_vao.Create(); + + uniform_block_data.dirty = true; + + uniform_block_data.lighting_lut_dirty.fill(true); + uniform_block_data.lighting_lut_dirty_any = true; + + uniform_block_data.fog_lut_dirty = true; + + uniform_block_data.proctex_noise_lut_dirty = true; + uniform_block_data.proctex_color_map_dirty = true; + uniform_block_data.proctex_alpha_map_dirty = true; + uniform_block_data.proctex_lut_dirty = true; + uniform_block_data.proctex_diff_lut_dirty = true; + + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + uniform_size_aligned_vs = + Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); + + // Set vertex attributes for software shader path + state.draw.vertex_array = sw_vao.handle; + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.Apply(); + + glVertexAttribPointer(ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, position)); + glEnableVertexAttribArray(ATTRIBUTE_POSITION); + + glVertexAttribPointer(ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, color)); + glEnableVertexAttribArray(ATTRIBUTE_COLOR); + + glVertexAttribPointer(ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord0)); + glVertexAttribPointer(ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord1)); + glVertexAttribPointer(ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord2)); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD0); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD1); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD2); + + glVertexAttribPointer(ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); + glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD0_W); + + glVertexAttribPointer(ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, normquat)); + glEnableVertexAttribArray(ATTRIBUTE_NORMQUAT); + + glVertexAttribPointer(ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), + (GLvoid*)offsetof(HardwareVertex, view)); + glEnableVertexAttribArray(ATTRIBUTE_VIEW); + + // Create render framebuffer + framebuffer.Create(); + + // Allocate and bind texture buffer lut textures + texture_buffer_lut_lf.Create(); + texture_buffer_lut_rg.Create(); + texture_buffer_lut_rgba.Create(); + state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle; + state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; + state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; + state.Apply(); + glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle()); + glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); + glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); + + // Bind index buffer for hardware shader path + state.draw.vertex_array = hw_vao.handle; + state.Apply(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); + +#ifdef __APPLE__ + if (IsVendorIntel()) { + shader_program_manager = std::make_unique( + emu_window, + VideoCore::g_separable_shader_enabled ? GLAD_GL_ARB_separate_shader_objects : false, + is_amd); + } else { + shader_program_manager = std::make_unique( + emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); + } +#else + shader_program_manager = std::make_unique( + emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); +#endif + + glEnable(GL_BLEND); + + SyncEntireState(); +} + +RasterizerOpenGL::~RasterizerOpenGL() = default; + +void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + shader_program_manager->LoadDiskCache(stop_loading, callback); +} + +void RasterizerOpenGL::SyncEntireState() { + // Sync fixed function OpenGL state + SyncClipEnabled(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncLogicOp(); + SyncStencilTest(); + SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); + + // Sync uniforms + SyncClipCoef(); + SyncDepthScale(); + SyncDepthOffset(); + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.texturing.GetTevStages(); + for (std::size_t index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (unsigned light_index = 0; light_index < 8; light_index++) { + SyncLightSpecular0(light_index); + SyncLightSpecular1(light_index); + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + SyncLightDistanceAttenuationBias(light_index); + SyncLightDistanceAttenuationScale(light_index); + } + + SyncFogColor(); + SyncProcTexNoise(); + SyncProcTexBias(); + SyncShadowBias(); + SyncShadowTextureBias(); +} + +/** + * This is a helper function to resolve an issue when interpolating opposite quaternions. See below + * for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore + * making Dot(Q1, -Q2) positive. + * + * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is + * correct for most cases but can still rotate around the long way sometimes. An implementation + * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check + * between each step would work for those cases at the cost of being more complex to implement. + * + * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around + * these issues, making this basic implementation actually more accurate to the hardware. + */ +static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { + Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; + Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; + + return (Common::Dot(a, b) < 0.f); +} + +void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); +} + +static constexpr std::array vs_attrib_types{ + GL_BYTE, // VertexAttributeFormat::BYTE + GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE + GL_SHORT, // VertexAttributeFormat::SHORT + GL_FLOAT // VertexAttributeFormat::FLOAT +}; + +struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; +}; + +RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + + u32 vertex_min; + u32 vertex_max; + if (is_indexed) { + const auto& index_info = regs.pipeline.index_array; + const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; + const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); + const u16* index_address_16 = reinterpret_cast(index_address_8); + const bool index_u16 = index_info.format != 0; + + vertex_min = 0xFFFF; + vertex_max = 0; + const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + res_cache.FlushRegion(address, size, nullptr); + for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { + const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; + vertex_min = std::min(vertex_min, vertex); + vertex_max = std::max(vertex_max, vertex); + } + } else { + vertex_min = regs.pipeline.vertex_offset; + vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; + } + + const u32 vertex_num = vertex_max - vertex_min + 1; + u32 vs_input_size = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count != 0) { + vs_input_size += loader.byte_count * vertex_num; + } + } + + return {vertex_min, vertex_max, vs_input_size}; +} + +void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, + GLuint vs_input_index_min, GLuint vs_input_index_max) { + MICROPROFILE_SCOPE(OpenGL_VAO); + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); + + state.draw.vertex_array = hw_vao.handle; + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.Apply(); + + std::array enable_attributes{}; + + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count == 0 || loader.byte_count == 0) { + continue; + } + + u32 offset = 0; + for (u32 comp = 0; comp < loader.component_count && comp < 12; ++comp) { + u32 attribute_index = loader.GetComponent(comp); + if (attribute_index < 12) { + if (vertex_attributes.GetNumElements(attribute_index) != 0) { + offset = Common::AlignUp( + offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); + + u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); + GLint size = vertex_attributes.GetNumElements(attribute_index); + GLenum type = vs_attrib_types[static_cast( + vertex_attributes.GetFormat(attribute_index))]; + GLsizei stride = loader.byte_count; + glVertexAttribPointer(input_reg, size, type, GL_FALSE, stride, + reinterpret_cast(buffer_offset + offset)); + enable_attributes[input_reg] = true; + + offset += vertex_attributes.GetStride(attribute_index); + } + } else { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, + // respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } + } + + PAddr data_addr = + base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); + + u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + u32 data_size = loader.byte_count * vertex_num; + + res_cache.FlushRegion(data_addr, data_size, nullptr); + std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); + + array_ptr += data_size; + buffer_offset += data_size; + } + + for (std::size_t i = 0; i < enable_attributes.size(); ++i) { + if (enable_attributes[i] != hw_vao_enabled_attributes[i]) { + if (enable_attributes[i]) { + glEnableVertexAttribArray(static_cast(i)); + } else { + glDisableVertexAttribArray(static_cast(i)); + } + hw_vao_enabled_attributes[i] = enable_attributes[i]; + } + + if (vertex_attributes.IsDefaultAttribute(i)) { + const u32 reg = regs.vs.GetRegisterForAttribute(i); + if (!enable_attributes[reg]) { + const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), + attr.w.ToFloat32()); + } + } + } +} + +bool RasterizerOpenGL::SetupVertexShader() { + MICROPROFILE_SCOPE(OpenGL_VS); + return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs, + Pica::g_state.vs); +} + +bool RasterizerOpenGL::SetupGeometryShader() { + MICROPROFILE_SCOPE(OpenGL_GS); + const auto& regs = Pica::g_state.regs; + + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); + return false; + } + + shader_program_manager->UseFixedGeometryShader(regs); + return true; +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { + return false; + } + if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { + return false; + } + } + + if (!SetupVertexShader()) + return false; + + if (!SetupGeometryShader()) + return false; + + return Draw(true, is_indexed); +} + +static GLenum GetCurrentPrimitiveMode() { + const auto& regs = Pica::g_state.regs; + switch (regs.pipeline.triangle_topology) { + case Pica::PipelineRegs::TriangleTopology::Shader: + case Pica::PipelineRegs::TriangleTopology::List: + return GL_TRIANGLES; + case Pica::PipelineRegs::TriangleTopology::Fan: + return GL_TRIANGLE_FAN; + case Pica::PipelineRegs::TriangleTopology::Strip: + return GL_TRIANGLE_STRIP; + default: + UNREACHABLE(); + } +} + +bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + GLenum primitive_mode = GetCurrentPrimitiveMode(); + + auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); + + if (vs_input_size > VERTEX_BUFFER_SIZE) { + LOG_WARNING(Render_OpenGL, "Too large vertex input size {}", vs_input_size); + return false; + } + + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.Apply(); + + u8* buffer_ptr; + GLintptr buffer_offset; + std::tie(buffer_ptr, buffer_offset, std::ignore) = vertex_buffer.Map(vs_input_size, 4); + SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max); + vertex_buffer.Unmap(vs_input_size); + + shader_program_manager->ApplyTo(state); + state.Apply(); + + if (is_indexed) { + bool index_u16 = regs.pipeline.index_array.format != 0; + std::size_t index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + + if (index_buffer_size > INDEX_BUFFER_SIZE) { + LOG_WARNING(Render_OpenGL, "Too large index input size {}", index_buffer_size); + return false; + } + + const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( + regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + std::tie(buffer_ptr, buffer_offset, std::ignore) = index_buffer.Map(index_buffer_size, 4); + std::memcpy(buffer_ptr, index_data, index_buffer_size); + index_buffer.Unmap(index_buffer_size); + + glDrawRangeElementsBaseVertex( + primitive_mode, vs_input_index_min, vs_input_index_max, regs.pipeline.num_vertices, + index_u16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, + reinterpret_cast(buffer_offset), -static_cast(vs_input_index_min)); + } else { + glDrawArrays(primitive_mode, 0, regs.pipeline.num_vertices); + } + return true; +} + +void RasterizerOpenGL::DrawTriangles() { + if (vertex_batch.empty()) + return; + Draw(false, false); +} + +bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { + MICROPROFILE_SCOPE(OpenGL_Drawing); + const auto& regs = Pica::g_state.regs; + + bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + + const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE || + state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || + state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && state.stencil.test_enabled)); + + Common::Rectangle viewport_rect_unscaled{ + // These registers hold half-width and half-height, so must be multiplied by 2 + regs.rasterizer.viewport_corner.x, // left + regs.rasterizer.viewport_corner.y + // top + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.x + // right + static_cast(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * + 2), + regs.rasterizer.viewport_corner.y // bottom + }; + + Surface color_surface; + Surface depth_surface; + Common::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + Common::Rectangle draw_rect{ + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(std::clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(std::clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + if (shadow_rendering) { + if (!allow_shadow || color_surface == nullptr) { + return true; + } + glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, + color_surface->width * color_surface->res_scale); + glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT, + color_surface->height * color_surface->res_scale); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + state.image_shadow_buffer = color_surface->texture.handle; + } else { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } + } + + // Sync the viewport + state.viewport.x = + static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; + state.viewport.y = + static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; + state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); + state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); + + if (uniform_block_data.data.framebuffer_scale != res_scale) { + uniform_block_data.data.framebuffer_scale = res_scale; + uniform_block_data.dirty = true; + } + + // Scissor checks are window-, not viewport-relative, which means that if the cached texture + // sub-rect changes, the scissor bounds also need to be updated. + GLint scissor_x1 = + static_cast(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale); + GLint scissor_y1 = + static_cast(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale); + // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when + // scaling or doing multisampling. + GLint scissor_x2 = + static_cast(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale); + GLint scissor_y2 = static_cast(surfaces_rect.bottom + + (regs.rasterizer.scissor_test.y2 + 1) * res_scale); + + if (uniform_block_data.data.scissor_x1 != scissor_x1 || + uniform_block_data.data.scissor_x2 != scissor_x2 || + uniform_block_data.data.scissor_y1 != scissor_y1 || + uniform_block_data.data.scissor_y2 != scissor_y2) { + + uniform_block_data.data.scissor_x1 = scissor_x1; + uniform_block_data.data.scissor_x2 = scissor_x2; + uniform_block_data.data.scissor_y1 = scissor_y1; + uniform_block_data.data.scissor_y2 = scissor_y2; + uniform_block_data.dirty = true; + } + + bool need_duplicate_texture = false; + auto CheckBarrier = [&need_duplicate_texture, &color_surface](GLuint handle) { + if (color_surface && color_surface->texture.handle == handle) { + need_duplicate_texture = true; + } + }; + + // Sync and bind the texture surfaces + const auto pica_textures = regs.texturing.GetTextures(); + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + if (texture.enabled) { + if (texture_index == 0) { + using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + if (!allow_shadow) + continue; + + Surface surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); + } else { + state.image_shadow_texture_px = 0; + } + continue; + } + case TextureType::ShadowCube: { + if (!allow_shadow) + continue; + Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister( + texture.config, texture.format); + Surface surface; + + using CubeFace = Pica::TexturingRegs::CubeFace; + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); + } else { + state.image_shadow_texture_px = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_nx = surface->texture.handle); + } else { + state.image_shadow_texture_nx = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_py = surface->texture.handle); + } else { + state.image_shadow_texture_py = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_ny = surface->texture.handle); + } else { + state.image_shadow_texture_ny = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_pz = surface->texture.handle); + } else { + state.image_shadow_texture_pz = 0; + } + + info.physical_address = + regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + surface = res_cache.GetTextureSurface(info); + if (surface != nullptr) { + CheckBarrier(state.image_shadow_texture_nz = surface->texture.handle); + } else { + state.image_shadow_texture_nz = 0; + } + + continue; + } + case TextureType::TextureCube: + using CubeFace = Pica::TexturingRegs::CubeFace; + TextureCubeConfig config; + config.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); + config.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); + config.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); + config.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); + config.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); + config.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); + config.width = texture.config.width; + config.format = texture.format; + state.texture_cube_unit.texture_cube = + res_cache.GetTextureCube(config).texture.handle; + + texture_cube_sampler.SyncWithConfig(texture.config); + state.texture_units[texture_index].texture_2d = 0; + continue; // Texture unit 0 setup finished. Continue to next unit + } + state.texture_cube_unit.texture_cube = 0; + } + + texture_samplers[texture_index].SyncWithConfig(texture.config); + Surface surface = res_cache.GetTextureSurface(texture); + if (surface != nullptr) { + CheckBarrier(state.texture_units[texture_index].texture_2d = + surface->texture.handle); + } else { + // Can occur when texture addr is null or its memory is unmapped/invalid + // HACK: In this case, the correct behaviour for the PICA is to use the last + // rendered colour. But because this would be impractical to implement, the + // next best alternative is to use a clear texture, essentially skipping + // the geometry in question. + // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn + // on the male character's face, which in the OpenGL default appear black. + state.texture_units[texture_index].texture_2d = default_texture; + } + } else { + state.texture_units[texture_index].texture_2d = 0; + } + } + + OGLTexture temp_tex; + if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) { + // The game is trying to use a surface as a texture and framebuffer at the same time + // which causes unpredictable behavior on the host. + // Making a copy to sample from eliminates this issue and seems to be fairly cheap. + temp_tex.Create(); + glBindTexture(GL_TEXTURE_2D, temp_tex.handle); + auto [internal_format, format, type] = GetFormatTuple(color_surface->pixel_format); + OGLTexture::Allocate(GL_TEXTURE_2D, color_surface->max_level + 1, internal_format, format, + type, color_surface->GetScaledWidth(), + color_surface->GetScaledHeight()); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glBindTexture(GL_TEXTURE_2D, state.texture_units[0].texture_2d); + + for (std::size_t level{0}; level <= color_surface->max_level; ++level) { + glCopyImageSubData(color_surface->texture.handle, GL_TEXTURE_2D, level, 0, 0, 0, + temp_tex.handle, GL_TEXTURE_2D, level, 0, 0, 0, + color_surface->GetScaledWidth() >> level, + color_surface->GetScaledHeight() >> level, 1); + } + + for (auto& unit : state.texture_units) { + if (unit.texture_2d == color_surface->texture.handle) { + unit.texture_2d = temp_tex.handle; + } + } + for (auto shadow_unit : {&state.image_shadow_texture_nx, &state.image_shadow_texture_ny, + &state.image_shadow_texture_nz, &state.image_shadow_texture_px, + &state.image_shadow_texture_py, &state.image_shadow_texture_pz}) { + if (*shadow_unit == color_surface->texture.handle) { + *shadow_unit = temp_tex.handle; + } + } + } + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); + + // Sync the uniform data + UploadUniforms(accelerate); + + // Viewport can have negative offsets or larger + // dimensions than our framebuffer sub-rect. + // Enable scissor test to prevent drawing + // outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); + state.Apply(); + + // Draw the vertex batch + bool succeeded = true; + if (accelerate) { + succeeded = AccelerateDrawBatchInternal(is_indexed); + } else { + state.draw.vertex_array = sw_vao.handle; + state.draw.vertex_buffer = vertex_buffer.GetHandle(); + shader_program_manager->UseTrivialVertexShader(); + shader_program_manager->UseTrivialGeometryShader(); + shader_program_manager->ApplyTo(state); + state.Apply(); + + std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); + for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); + base_vertex += max_vertices) { + const std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); + const std::size_t vertex_size = vertices * sizeof(HardwareVertex); + u8* vbo; + GLintptr offset; + std::tie(vbo, offset, std::ignore) = + vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); + std::memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); + vertex_buffer.Unmap(vertex_size); + glDrawArrays(GL_TRIANGLES, static_cast(offset / sizeof(HardwareVertex)), + static_cast(vertices)); + } + } + + vertex_batch.clear(); + + // Reset textures in rasterizer state context because the rasterizer cache might delete them + for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + state.texture_units[texture_index].texture_2d = 0; + } + state.texture_cube_unit.texture_cube = 0; + if (allow_shadow) { + state.image_shadow_texture_px = 0; + state.image_shadow_texture_nx = 0; + state.image_shadow_texture_py = 0; + state.image_shadow_texture_ny = 0; + state.image_shadow_texture_pz = 0; + state.image_shadow_texture_nz = 0; + state.image_shadow_buffer = 0; + } + state.Apply(); + + if (shadow_rendering) { + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | + GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); + } + + // Mark framebuffer surfaces as dirty + Common::Rectangle draw_rect_unscaled{draw_rect.left / res_scale, draw_rect.top / res_scale, + draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } + + return succeeded; +} + +void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { + const auto& regs = Pica::g_state.regs; + + switch (id) { + // Culling + case PICA_REG_INDEX(rasterizer.cull_mode): + SyncCullMode(); + break; + + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX(rasterizer.clip_coef[0]): + case PICA_REG_INDEX(rasterizer.clip_coef[1]): + case PICA_REG_INDEX(rasterizer.clip_coef[2]): + case PICA_REG_INDEX(rasterizer.clip_coef[3]): + SyncClipCoef(); + break; + + // Depth modifiers + case PICA_REG_INDEX(rasterizer.viewport_depth_range): + SyncDepthScale(); + break; + case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane): + SyncDepthOffset(); + break; + + // Depth buffering + case PICA_REG_INDEX(rasterizer.depthmap_enable): + shader_dirty = true; + break; + + // Blending + case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + shader_dirty = true; + } + SyncBlendEnabled(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.blend_const): + SyncBlendColor(); + break; + + // Shadow texture + case PICA_REG_INDEX(texturing.shadow): + SyncShadowTextureBias(); + break; + + // Fog state + case PICA_REG_INDEX(texturing.fog_color): + SyncFogColor(); + break; + case PICA_REG_INDEX(texturing.fog_lut_data[0]): + case PICA_REG_INDEX(texturing.fog_lut_data[1]): + case PICA_REG_INDEX(texturing.fog_lut_data[2]): + case PICA_REG_INDEX(texturing.fog_lut_data[3]): + case PICA_REG_INDEX(texturing.fog_lut_data[4]): + case PICA_REG_INDEX(texturing.fog_lut_data[5]): + case PICA_REG_INDEX(texturing.fog_lut_data[6]): + case PICA_REG_INDEX(texturing.fog_lut_data[7]): + uniform_block_data.fog_lut_dirty = true; + break; + + // ProcTex state + case PICA_REG_INDEX(texturing.proctex): + case PICA_REG_INDEX(texturing.proctex_lut): + case PICA_REG_INDEX(texturing.proctex_lut_offset): + SyncProcTexBias(); + shader_dirty = true; + break; + + case PICA_REG_INDEX(texturing.proctex_noise_u): + case PICA_REG_INDEX(texturing.proctex_noise_v): + case PICA_REG_INDEX(texturing.proctex_noise_frequency): + SyncProcTexNoise(); + break; + + case PICA_REG_INDEX(texturing.proctex_lut_data[0]): + case PICA_REG_INDEX(texturing.proctex_lut_data[1]): + case PICA_REG_INDEX(texturing.proctex_lut_data[2]): + case PICA_REG_INDEX(texturing.proctex_lut_data[3]): + case PICA_REG_INDEX(texturing.proctex_lut_data[4]): + case PICA_REG_INDEX(texturing.proctex_lut_data[5]): + case PICA_REG_INDEX(texturing.proctex_lut_data[6]): + case PICA_REG_INDEX(texturing.proctex_lut_data[7]): + using Pica::TexturingRegs; + switch (regs.texturing.proctex_lut_config.ref_table.Value()) { + case TexturingRegs::ProcTexLutTable::Noise: + uniform_block_data.proctex_noise_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorMap: + uniform_block_data.proctex_color_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::AlphaMap: + uniform_block_data.proctex_alpha_map_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::Color: + uniform_block_data.proctex_lut_dirty = true; + break; + case TexturingRegs::ProcTexLutTable::ColorDiff: + uniform_block_data.proctex_diff_lut_dirty = true; + break; + } + break; + + // Alpha test + case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): + SyncAlphaTest(); + shader_dirty = true; + break; + + // Sync GL stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): + SyncStencilTest(); + break; + + // Sync GL depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) + case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): + SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync GL depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync GL color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): + SyncColorWriteMask(); + break; + + case PICA_REG_INDEX(framebuffer.shadow): + SyncShadowBias(); + break; + + // Scissor test + case PICA_REG_INDEX(rasterizer.scissor_test.mode): + shader_dirty = true; + break; + + // Logic op + case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + if (GLES) { + // With GLES, we need this in the fragment shader to emulate logic operations + shader_dirty = true; + } + SyncLogicOp(); + break; + + case PICA_REG_INDEX(texturing.main_config): + shader_dirty = true; + break; + + // Texture 0 type + case PICA_REG_INDEX(texturing.texture0.type): + shader_dirty = true; + break; + + // TEV stages + // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) + case PICA_REG_INDEX(texturing.tev_stage0.color_source1): + case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage0.color_op): + case PICA_REG_INDEX(texturing.tev_stage0.color_scale): + case PICA_REG_INDEX(texturing.tev_stage1.color_source1): + case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage1.color_op): + case PICA_REG_INDEX(texturing.tev_stage1.color_scale): + case PICA_REG_INDEX(texturing.tev_stage2.color_source1): + case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage2.color_op): + case PICA_REG_INDEX(texturing.tev_stage2.color_scale): + case PICA_REG_INDEX(texturing.tev_stage3.color_source1): + case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage3.color_op): + case PICA_REG_INDEX(texturing.tev_stage3.color_scale): + case PICA_REG_INDEX(texturing.tev_stage4.color_source1): + case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage4.color_op): + case PICA_REG_INDEX(texturing.tev_stage4.color_scale): + case PICA_REG_INDEX(texturing.tev_stage5.color_source1): + case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1): + case PICA_REG_INDEX(texturing.tev_stage5.color_op): + case PICA_REG_INDEX(texturing.tev_stage5.color_scale): + case PICA_REG_INDEX(texturing.tev_combiner_buffer_input): + shader_dirty = true; + break; + case PICA_REG_INDEX(texturing.tev_stage0.const_r): + SyncTevConstColor(0, regs.texturing.tev_stage0); + break; + case PICA_REG_INDEX(texturing.tev_stage1.const_r): + SyncTevConstColor(1, regs.texturing.tev_stage1); + break; + case PICA_REG_INDEX(texturing.tev_stage2.const_r): + SyncTevConstColor(2, regs.texturing.tev_stage2); + break; + case PICA_REG_INDEX(texturing.tev_stage3.const_r): + SyncTevConstColor(3, regs.texturing.tev_stage3); + break; + case PICA_REG_INDEX(texturing.tev_stage4.const_r): + SyncTevConstColor(4, regs.texturing.tev_stage4); + break; + case PICA_REG_INDEX(texturing.tev_stage5.const_r): + SyncTevConstColor(5, regs.texturing.tev_stage5); + break; + + // TEV combiner buffer color + case PICA_REG_INDEX(texturing.tev_combiner_buffer_color): + SyncCombinerColor(); + break; + + // Fragment lighting switches + case PICA_REG_INDEX(lighting.disable): + case PICA_REG_INDEX(lighting.max_light_index): + case PICA_REG_INDEX(lighting.config0): + case PICA_REG_INDEX(lighting.config1): + case PICA_REG_INDEX(lighting.abs_lut_input): + case PICA_REG_INDEX(lighting.lut_input): + case PICA_REG_INDEX(lighting.lut_scale): + case PICA_REG_INDEX(lighting.light_enable): + break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX(lighting.light[0].specular_0): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_0): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_0): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_0): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_0): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_0): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_0): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_0): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX(lighting.light[0].specular_1): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX(lighting.light[1].specular_1): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX(lighting.light[2].specular_1): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX(lighting.light[3].specular_1): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX(lighting.light[4].specular_1): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX(lighting.light[5].specular_1): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX(lighting.light[6].specular_1): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX(lighting.light[7].specular_1): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX(lighting.light[0].diffuse): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX(lighting.light[1].diffuse): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX(lighting.light[2].diffuse): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX(lighting.light[3].diffuse): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX(lighting.light[4].diffuse): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX(lighting.light[5].diffuse): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX(lighting.light[6].diffuse): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX(lighting.light[7].diffuse): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX(lighting.light[0].ambient): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX(lighting.light[1].ambient): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX(lighting.light[2].ambient): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX(lighting.light[3].ambient): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX(lighting.light[4].ambient): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX(lighting.light[5].ambient): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX(lighting.light[6].ambient): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX(lighting.light[7].ambient): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX(lighting.light[0].x): + case PICA_REG_INDEX(lighting.light[0].z): + SyncLightPosition(0); + break; + case PICA_REG_INDEX(lighting.light[1].x): + case PICA_REG_INDEX(lighting.light[1].z): + SyncLightPosition(1); + break; + case PICA_REG_INDEX(lighting.light[2].x): + case PICA_REG_INDEX(lighting.light[2].z): + SyncLightPosition(2); + break; + case PICA_REG_INDEX(lighting.light[3].x): + case PICA_REG_INDEX(lighting.light[3].z): + SyncLightPosition(3); + break; + case PICA_REG_INDEX(lighting.light[4].x): + case PICA_REG_INDEX(lighting.light[4].z): + SyncLightPosition(4); + break; + case PICA_REG_INDEX(lighting.light[5].x): + case PICA_REG_INDEX(lighting.light[5].z): + SyncLightPosition(5); + break; + case PICA_REG_INDEX(lighting.light[6].x): + case PICA_REG_INDEX(lighting.light[6].z): + SyncLightPosition(6); + break; + case PICA_REG_INDEX(lighting.light[7].x): + case PICA_REG_INDEX(lighting.light[7].z): + SyncLightPosition(7); + break; + + // Fragment spot lighting direction + case PICA_REG_INDEX(lighting.light[0].spot_x): + case PICA_REG_INDEX(lighting.light[0].spot_z): + SyncLightSpotDirection(0); + break; + case PICA_REG_INDEX(lighting.light[1].spot_x): + case PICA_REG_INDEX(lighting.light[1].spot_z): + SyncLightSpotDirection(1); + break; + case PICA_REG_INDEX(lighting.light[2].spot_x): + case PICA_REG_INDEX(lighting.light[2].spot_z): + SyncLightSpotDirection(2); + break; + case PICA_REG_INDEX(lighting.light[3].spot_x): + case PICA_REG_INDEX(lighting.light[3].spot_z): + SyncLightSpotDirection(3); + break; + case PICA_REG_INDEX(lighting.light[4].spot_x): + case PICA_REG_INDEX(lighting.light[4].spot_z): + SyncLightSpotDirection(4); + break; + case PICA_REG_INDEX(lighting.light[5].spot_x): + case PICA_REG_INDEX(lighting.light[5].spot_z): + SyncLightSpotDirection(5); + break; + case PICA_REG_INDEX(lighting.light[6].spot_x): + case PICA_REG_INDEX(lighting.light[6].spot_z): + SyncLightSpotDirection(6); + break; + case PICA_REG_INDEX(lighting.light[7].spot_x): + case PICA_REG_INDEX(lighting.light[7].spot_z): + SyncLightSpotDirection(7); + break; + + // Fragment lighting light source config + case PICA_REG_INDEX(lighting.light[0].config): + case PICA_REG_INDEX(lighting.light[1].config): + case PICA_REG_INDEX(lighting.light[2].config): + case PICA_REG_INDEX(lighting.light[3].config): + case PICA_REG_INDEX(lighting.light[4].config): + case PICA_REG_INDEX(lighting.light[5].config): + case PICA_REG_INDEX(lighting.light[6].config): + case PICA_REG_INDEX(lighting.light[7].config): + shader_dirty = true; + break; + + // Fragment lighting distance attenuation bias + case PICA_REG_INDEX(lighting.light[0].dist_atten_bias): + SyncLightDistanceAttenuationBias(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_bias): + SyncLightDistanceAttenuationBias(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_bias): + SyncLightDistanceAttenuationBias(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_bias): + SyncLightDistanceAttenuationBias(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_bias): + SyncLightDistanceAttenuationBias(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_bias): + SyncLightDistanceAttenuationBias(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_bias): + SyncLightDistanceAttenuationBias(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_bias): + SyncLightDistanceAttenuationBias(7); + break; + + // Fragment lighting distance attenuation scale + case PICA_REG_INDEX(lighting.light[0].dist_atten_scale): + SyncLightDistanceAttenuationScale(0); + break; + case PICA_REG_INDEX(lighting.light[1].dist_atten_scale): + SyncLightDistanceAttenuationScale(1); + break; + case PICA_REG_INDEX(lighting.light[2].dist_atten_scale): + SyncLightDistanceAttenuationScale(2); + break; + case PICA_REG_INDEX(lighting.light[3].dist_atten_scale): + SyncLightDistanceAttenuationScale(3); + break; + case PICA_REG_INDEX(lighting.light[4].dist_atten_scale): + SyncLightDistanceAttenuationScale(4); + break; + case PICA_REG_INDEX(lighting.light[5].dist_atten_scale): + SyncLightDistanceAttenuationScale(5); + break; + case PICA_REG_INDEX(lighting.light[6].dist_atten_scale): + SyncLightDistanceAttenuationScale(6); + break; + case PICA_REG_INDEX(lighting.light[7].dist_atten_scale): + SyncLightDistanceAttenuationScale(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX(lighting.global_ambient): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX(lighting.lut_data[0]): + case PICA_REG_INDEX(lighting.lut_data[1]): + case PICA_REG_INDEX(lighting.lut_data[2]): + case PICA_REG_INDEX(lighting.lut_data[3]): + case PICA_REG_INDEX(lighting.lut_data[4]): + case PICA_REG_INDEX(lighting.lut_data[5]): + case PICA_REG_INDEX(lighting.lut_data[6]): + case PICA_REG_INDEX(lighting.lut_data[7]): { + const auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty_any = true; + break; + } + } +} + +void RasterizerOpenGL::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushAll(); +} + +void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerOpenGL::ClearAll(bool flush) { + res_cache.ClearAll(flush); +} + +bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + MICROPROFILE_SCOPE(OpenGL_Blits); + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.width = config.output_width; + src_params.stride = config.input_width; + src_params.height = config.output_height; + src_params.is_tiled = !config.input_linear; + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format); + src_params.UpdateParams(); + + SurfaceParams dst_params; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + dst_params.is_tiled = config.input_linear != config.dont_swizzle; + dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.UpdateParams(); + + Common::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + if (src_surface == nullptr) + return false; + + dst_params.res_scale = src_surface->res_scale; + + Common::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); + if (dst_surface == nullptr) + return false; + + if (src_surface->is_tiled != dst_surface->is_tiled) + std::swap(src_rect.top, src_rect.bottom); + + if (config.flip_vertically) + std::swap(src_rect.top, src_rect.bottom); + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) + return false; + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { + return false; + } + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (input_width == 0 && input_gap != 0) { + return false; + } + if (input_gap == 0 || input_width >= copy_size) { + input_width = copy_size; + input_gap = 0; + } + if (copy_size % input_width != 0) { + return false; + } + + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (output_width == 0 && output_gap != 0) { + return false; + } + if (output_gap == 0 || output_width >= copy_size) { + output_width = copy_size; + output_gap = 0; + } + if (copy_size % output_width != 0) { + return false; + } + + SurfaceParams src_params; + src_params.addr = config.GetPhysicalInputAddress(); + src_params.stride = input_width + input_gap; // stride in bytes + src_params.width = input_width; // width in bytes + src_params.height = copy_size / input_width; + src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; + src_params.end = src_params.addr + src_params.size; + + Common::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + if (src_surface == nullptr) { + return false; + } + + if (output_gap != 0 && + (output_width != src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) * + (src_surface->is_tiled ? 8 : 1) || + output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) { + return false; + } + + SurfaceParams dst_params = *src_surface; + dst_params.addr = config.GetPhysicalOutputAddress(); + dst_params.width = src_rect.GetWidth() / src_surface->res_scale; + dst_params.stride = dst_params.width + src_surface->PixelsInBytes( + src_surface->is_tiled ? output_gap / 8 : output_gap); + dst_params.height = src_rect.GetHeight() / src_surface->res_scale; + dst_params.res_scale = src_surface->res_scale; + dst_params.UpdateParams(); + + // Since we are going to invalidate the gap if there is one, we will have to load it first + const bool load_gap = output_gap != 0; + Common::Rectangle dst_rect; + Surface dst_surface; + std::tie(dst_surface, dst_rect) = + res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + if (dst_surface == nullptr) { + return false; + } + + if (dst_surface->type == SurfaceType::Texture) { + return false; + } + + if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) { + return false; + } + + res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + Surface dst_surface = res_cache.GetFillSurface(config); + if (dst_surface == nullptr) + return false; + + res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface); + return true; +} + +bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) { + return false; + } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + + SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(config.width.Value(), pixel_stride); + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); + + Common::Rectangle src_rect; + Surface src_surface; + std::tie(src_surface, src_rect) = + res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + + if (src_surface == nullptr) { + return false; + } + + u32 scaled_width = src_surface->GetScaledWidth(); + u32 scaled_height = src_surface->GetScaledHeight(); + + screen_info.display_texcoords = Common::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.display_texture = src_surface->texture.handle; + + return true; +} + +void RasterizerOpenGL::SamplerInfo::Create() { + sampler.Create(); + mag_filter = min_filter = mip_filter = TextureConfig::Linear; + wrap_s = wrap_t = TextureConfig::Repeat; + border_color = 0; + lod_min = lod_max = 0; + lod_bias = 0; + + // default is 1000 and -1000 + // Other attributes have correct defaults + glSamplerParameterf(sampler.handle, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); + glSamplerParameterf(sampler.handle, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); +} + +void RasterizerOpenGL::SamplerInfo::SyncWithConfig( + const Pica::TexturingRegs::TextureConfig& config) { + + GLuint s = sampler.handle; + + if (mag_filter != config.mag_filter) { + mag_filter = config.mag_filter; + glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureMagFilterMode(mag_filter)); + } + + // TODO(wwylele): remove new_supress_mipmap_for_cube logic once mipmap for cube is implemented + bool new_supress_mipmap_for_cube = + config.type == Pica::TexturingRegs::TextureConfig::TextureCube; + if (min_filter != config.min_filter || mip_filter != config.mip_filter || + supress_mipmap_for_cube != new_supress_mipmap_for_cube) { + min_filter = config.min_filter; + mip_filter = config.mip_filter; + supress_mipmap_for_cube = new_supress_mipmap_for_cube; + if (new_supress_mipmap_for_cube) { + // HACK: use mag filter converter for min filter because they are the same anyway + glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, + PicaToGL::TextureMagFilterMode(min_filter)); + } else { + glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, + PicaToGL::TextureMinFilterMode(min_filter, mip_filter)); + } + } + + if (wrap_s != config.wrap_s) { + wrap_s = config.wrap_s; + glSamplerParameteri(s, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(wrap_s)); + } + if (wrap_t != config.wrap_t) { + wrap_t = config.wrap_t; + glSamplerParameteri(s, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(wrap_t)); + } + + if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { + if (border_color != config.border_color.raw) { + border_color = config.border_color.raw; + auto gl_color = PicaToGL::ColorRGBA8(border_color); + glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); + } + } + + if (lod_min != config.lod.min_level) { + lod_min = config.lod.min_level; + glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); + } + + if (lod_max != config.lod.max_level) { + lod_max = config.lod.max_level; + glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); + } + + if (!GLES && lod_bias != config.lod.bias) { + lod_bias = config.lod.bias; + glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias / 256.0f); + } +} + +void RasterizerOpenGL::SetShader() { + shader_program_manager->UseFragmentShader(Pica::g_state.regs); +} + +void RasterizerOpenGL::SyncClipEnabled() { + state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerOpenGL::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncCullMode() { + const auto& regs = Pica::g_state.regs; + + switch (regs.rasterizer.cull_mode) { + case Pica::RasterizerRegs::CullMode::KeepAll: + state.cull.enabled = false; + break; + + case Pica::RasterizerRegs::CullMode::KeepClockWise: + state.cull.enabled = true; + state.cull.front_face = GL_CW; + break; + + case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: + state.cull.enabled = true; + state.cull.front_face = GL_CCW; + break; + + default: + LOG_CRITICAL(Render_OpenGL, "Unknown cull mode {}", + static_cast(regs.rasterizer.cull_mode.Value())); + UNIMPLEMENTED(); + break; + } +} + +void RasterizerOpenGL::SyncDepthScale() { + float depth_scale = + Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); + if (depth_scale != uniform_block_data.data.depth_scale) { + uniform_block_data.data.depth_scale = depth_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncDepthOffset() { + float depth_offset = + Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32(); + if (depth_offset != uniform_block_data.data.depth_offset) { + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncBlendEnabled() { + state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1); +} + +void RasterizerOpenGL::SyncBlendFuncs() { + const auto& regs = Pica::g_state.regs; + state.blend.rgb_equation = + PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + state.blend.a_equation = + PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + state.blend.src_rgb_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + state.blend.src_a_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); + state.blend.dst_a_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerOpenGL::SyncBlendColor() { + auto blend_color = + PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); + state.blend.color.red = blend_color[0]; + state.blend.color.green = blend_color[1]; + state.blend.color.blue = blend_color[2]; + state.blend.color.alpha = blend_color[3]; +} + +void RasterizerOpenGL::SyncFogColor() { + const auto& regs = Pica::g_state.regs; + uniform_block_data.data.fog_color = { + regs.texturing.fog_color.r.Value() / 255.0f, + regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.b.Value() / 255.0f, + }; + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncProcTexNoise() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_noise_f = { + Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(), + }; + uniform_block_data.data.proctex_noise_a = { + regs.proctex_noise_u.amplitude / 4095.0f, + regs.proctex_noise_v.amplitude / 4095.0f, + }; + uniform_block_data.data.proctex_noise_p = { + Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(), + Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(), + }; + + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncProcTexBias() { + const auto& regs = Pica::g_state.regs.texturing; + uniform_block_data.data.proctex_bias = + Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8)) + .ToFloat32(); + + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncAlphaTest() { + const auto& regs = Pica::g_state.regs; + if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { + uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLogicOp() { + const auto& regs = Pica::g_state.regs; + state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); + + if (GLES) { + if (!regs.framebuffer.output_merger.alphablend_enable) { + if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + state.color_mask = {}; + } + } + } +} + +void RasterizerOpenGL::SyncColorWriteMask() { + const auto& regs = Pica::g_state.regs; + if (GLES) { + if (!regs.framebuffer.output_merger.alphablend_enable) { + if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. Return early to avoid overwriting this. + return; + } + } + } + + auto IsColorWriteEnabled = [&](u32 value) { + return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE + : GL_FALSE; + }; + + state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable); + state.color_mask.green_enabled = + IsColorWriteEnabled(regs.framebuffer.output_merger.green_enable); + state.color_mask.blue_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.blue_enable); + state.color_mask.alpha_enabled = + IsColorWriteEnabled(regs.framebuffer.output_merger.alpha_enable); +} + +void RasterizerOpenGL::SyncStencilWriteMask() { + const auto& regs = Pica::g_state.regs; + state.stencil.write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; +} + +void RasterizerOpenGL::SyncDepthWriteMask() { + const auto& regs = Pica::g_state.regs; + state.depth.write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable) + ? GL_TRUE + : GL_FALSE; +} + +void RasterizerOpenGL::SyncStencilTest() { + const auto& regs = Pica::g_state.regs; + state.stencil.test_enabled = + regs.framebuffer.output_merger.stencil_test.enable && + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + state.stencil.test_func = + PicaToGL::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); + state.stencil.test_ref = regs.framebuffer.output_merger.stencil_test.reference_value; + state.stencil.test_mask = regs.framebuffer.output_merger.stencil_test.input_mask; + state.stencil.action_stencil_fail = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); + state.stencil.action_depth_pass = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); +} + +void RasterizerOpenGL::SyncDepthTest() { + const auto& regs = Pica::g_state.regs; + state.depth.test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + state.depth.test_func = + regs.framebuffer.output_merger.depth_test_enable == 1 + ? PicaToGL::CompareFunc(regs.framebuffer.output_merger.depth_test_func) + : GL_ALWAYS; +} + +void RasterizerOpenGL::SyncCombinerColor() { + auto combiner_color = + PicaToGL::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); + if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { + uniform_block_data.data.tev_combiner_buffer_color = combiner_color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncTevConstColor(std::size_t stage_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage) { + const auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); + + if (const_color == uniform_block_data.data.const_color[stage_index]) { + return; + } + + uniform_block_data.data.const_color[stage_index] = const_color; + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncGlobalAmbient() { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular0(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular1(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDiffuse(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightAmbient(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightPosition(int light_index) { + GLvec3 position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()}; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpotDirection(int light_index) { + const auto& light = Pica::g_state.regs.lighting.light[light_index]; + GLvec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f, + light.spot_z / 2047.0f}; + + if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { + uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { + GLfloat dist_atten_bias = + Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) + .ToFloat32(); + + if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { + uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { + GLfloat dist_atten_scale = + Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) + .ToFloat32(); + + if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { + uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncShadowBias() { + const auto& shadow = Pica::g_state.regs.framebuffer.shadow; + GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); + GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); + + if (constant != uniform_block_data.data.shadow_bias_constant || + linear != uniform_block_data.data.shadow_bias_linear) { + uniform_block_data.data.shadow_bias_constant = constant; + uniform_block_data.data.shadow_bias_linear = linear; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncShadowTextureBias() { + GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1; + if (bias != uniform_block_data.data.shadow_texture_bias) { + uniform_block_data.data.shadow_texture_bias = bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + std::size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(GLvec2)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + uniform_block_data.lighting_lut_dirty_any = false; + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.fog_lut_offset = + static_cast((offset + bytes_used) / sizeof(GLvec2)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + uniform_block_data.fog_lut_dirty = false; + } + + texture_lf_buffer.Unmap(bytes_used); +} + +void RasterizerOpenGL::SyncAndUploadLUTs() { + constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha + sizeof(GLvec4) * 256 + // proctex + sizeof(GLvec4) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + std::size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( + const std::array& lut, + std::array& lut_data, GLint& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + lut_offset = static_cast((offset + bytes_used) / sizeof(GLvec2)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(GLvec4)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(GLvec4)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Unmap(bytes_used); +} + +void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { + // glBindBufferRange below also changes the generic buffer binding point, so we sync the state + // first + state.draw.uniform_buffer = uniform_buffer.GetHandle(); + state.Apply(); + + bool sync_vs = accelerate_draw; + bool sync_fs = uniform_block_data.dirty; + + if (!sync_vs && !sync_fs) + return; + + std::size_t uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + std::size_t used_bytes = 0; + u8* uniforms; + GLintptr offset; + bool invalidate; + std::tie(uniforms, offset, invalidate) = + uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + + if (sync_vs) { + VSUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); + std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); + glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(UniformBindings::VS), + uniform_buffer.GetHandle(), offset + used_bytes, sizeof(VSUniformData)); + used_bytes += uniform_size_aligned_vs; + } + + if (sync_fs || invalidate) { + std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData)); + glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(UniformBindings::Common), + uniform_buffer.GetHandle(), offset + used_bytes, sizeof(UniformData)); + uniform_block_data.dirty = false; + used_bytes += uniform_size_aligned_fs; + } + + uniform_buffer.Unmap(used_bytes); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 000000000..9c11dca6e --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,338 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/vector_math.h" +#include "core/hw/gpu.h" +#include "video_core/pica_state.h" +#include "video_core/pica_types.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_lighting.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/regs_texturing.h" +#include "video_core/shader/shader.h" + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { +class ShaderProgramManager; + +class RasterizerVulkan : public VideoCore::RasterizerInterface { +public: + explicit RasterizerVulkan(Frontend::EmuWindow& emu_window); + ~RasterizerVulkan() override; + + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + + void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; + void DrawTriangles() override; + void NotifyPicaRegisterChanged(u32 id) override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + void ClearAll(bool flush) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, OpenGL::ScreenInfo& screen_info) override; + bool AccelerateDrawBatch(bool is_indexed) override; + + /// Syncs entire status to match PICA registers + void SyncEntireState() override; + +private: + struct SamplerInfo { + using TextureConfig = Pica::TexturingRegs::TextureConfig; + + OGLSampler sampler; + + /// Creates the sampler object, initializing its state so that it's in sync with the + /// SamplerInfo struct. + void Create(); + /// Syncs the sampler object with the config, updating any necessary state. + void SyncWithConfig(const TextureConfig& config); + + private: + TextureConfig::TextureFilter mag_filter; + TextureConfig::TextureFilter min_filter; + TextureConfig::TextureFilter mip_filter; + TextureConfig::WrapMode wrap_s; + TextureConfig::WrapMode wrap_t; + u32 border_color; + u32 lod_min; + u32 lod_max; + s32 lod_bias; + + // TODO(wwylele): remove this once mipmap for cube is implemented + bool supress_mipmap_for_cube = false; + }; + + struct VertexInfo + { + VertexInfo() = default; + VertexInfo(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { + position[0] = v.pos.x.ToFloat32(); + position[1] = v.pos.y.ToFloat32(); + position[2] = v.pos.z.ToFloat32(); + position[3] = v.pos.w.ToFloat32(); + color[0] = v.color.x.ToFloat32(); + color[1] = v.color.y.ToFloat32(); + color[2] = v.color.z.ToFloat32(); + color[3] = v.color.w.ToFloat32(); + tex_coord0[0] = v.tc0.x.ToFloat32(); + tex_coord0[1] = v.tc0.y.ToFloat32(); + tex_coord1[0] = v.tc1.x.ToFloat32(); + tex_coord1[1] = v.tc1.y.ToFloat32(); + tex_coord2[0] = v.tc2.x.ToFloat32(); + tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + normquat = -normquat; + } + } + + glm::vec4 position; + glm::vec4 color; + glm::vec2 tex_coord0; + glm::vec2 tex_coord1; + glm::vec2 tex_coord2; + float tex_coord0_w; + glm::vec4 normquat; + glm::vec3 view; + }; + + /// Structure that the hardware rendered vertices are composed of + struct HardwareVertex : public VertexInfo + { + HardwareVertex() = default; + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexInfo(v, flip_quaternion) {}; + static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); + static constexpr std::array attribute_desc = + { + vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, position)), + vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, color)), + vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord0)), + vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord1)), + vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord2)), + vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexInfo, tex_coord0_w)), + vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, normquat)), + vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, view)), + }; + }; + + + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + + /// Sets the OpenGL shader in accordance with the current PICA register state + void SetShader(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the depth scale to match the PICA register + void SyncDepthScale(); + + /// Syncs the depth offset to match the PICA register + void SyncDepthOffset(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the fog states to match the PICA register + void SyncFogColor(); + + /// Sync the procedural texture noise configuration to match the PICA register + void SyncProcTexNoise(); + + /// Sync the procedural texture bias configuration to match the PICA register + void SyncProcTexBias(); + + /// Syncs the alpha test states to match the PICA register + void SyncAlphaTest(); + + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs the TEV combiner color buffer to match the PICA register + void SyncCombinerColor(); + + /// Syncs the TEV constant color to match the PICA register + void SyncTevConstColor(std::size_t tev_index, + const Pica::TexturingRegs::TevStageConfig& tev_stage); + + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + + /// Syncs the specified spot light direcition to match the PICA register + void SyncLightSpotDirection(int light_index); + + /// Syncs the specified light's distance attenuation bias to match the PICA register + void SyncLightDistanceAttenuationBias(int light_index); + + /// Syncs the specified light's distance attenuation scale to match the PICA register + void SyncLightDistanceAttenuationScale(int light_index); + + /// Syncs the shadow rendering bias to match the PICA register + void SyncShadowBias(); + + /// Syncs the shadow texture bias to match the PICA register + void SyncShadowTextureBias(); + + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); + + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(bool accelerate_draw); + + /// Generic draw function for DrawTriangles and AccelerateDrawBatch + bool Draw(bool accelerate, bool is_indexed); + + /// Internal implementation for AccelerateDrawBatch + bool AccelerateDrawBatchInternal(bool is_indexed); + + struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; + }; + + /// Retrieve the range and the size of the input vertex + VertexArrayInfo AnalyzeVertexArray(bool is_indexed); + + /// Setup vertex shader for AccelerateDrawBatch + bool SetupVertexShader(); + + /// Setup geometry shader for AccelerateDrawBatch + bool SetupGeometryShader(); + + bool is_amd; + + OpenGLState state; + GLuint default_texture; + + RasterizerCacheOpenGL res_cache; + + std::vector vertex_batch; + + bool shader_dirty = true; + + struct { + UniformData data; + std::array lighting_lut_dirty; + bool lighting_lut_dirty_any; + bool fog_lut_dirty; + bool proctex_noise_lut_dirty; + bool proctex_color_map_dirty; + bool proctex_alpha_map_dirty; + bool proctex_lut_dirty; + bool proctex_diff_lut_dirty; + bool dirty; + } uniform_block_data = {}; + + std::unique_ptr shader_program_manager; + + // They shall be big enough for about one frame. + static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024; + static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; + static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; + + OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw + std::array hw_vao_enabled_attributes{}; + + std::array texture_samplers; + OGLStreamBuffer vertex_buffer; + OGLStreamBuffer uniform_buffer; + OGLStreamBuffer index_buffer; + OGLStreamBuffer texture_buffer; + OGLStreamBuffer texture_lf_buffer; + OGLFramebuffer framebuffer; + GLint uniform_buffer_alignment; + std::size_t uniform_size_aligned_vs; + std::size_t uniform_size_aligned_fs; + + SamplerInfo texture_cube_sampler; + + OGLTexture texture_buffer_lut_lf; + OGLTexture texture_buffer_lut_rg; + OGLTexture texture_buffer_lut_rgba; + + std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; + std::array fog_lut_data{}; + std::array proctex_noise_lut_data{}; + std::array proctex_color_map_data{}; + std::array proctex_alpha_map_data{}; + std::array proctex_lut_data{}; + std::array proctex_diff_lut_data{}; + + bool allow_shadow; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp new file mode 100644 index 000000000..81145ae59 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -0,0 +1,1875 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/bit_field.h" +#include "common/color.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/texture.h" +#include "common/vector_math.h" +#include "core/core.h" +#include "core/frontend/emu_window.h" +#include "core/hle/kernel/process.h" +#include "core/memory.h" +#include "core/settings.h" +#include "video_core/pica_state.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" +#include "video_core/renderer_vulkan/vk_state.h" +#include "video_core/utils.h" +#include "video_core/video_core.h" + +namespace Vulkan { + +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT(static_cast(pixel_format) < fb_format_tuples.size()); + return fb_format_tuples[static_cast(pixel_format)]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + std::size_t tuple_idx = static_cast(pixel_format) - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; + } + return tex_tuple; +} + +template +static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +template +static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gpu_buffer) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 vk_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gpu_ptr = gpu_buffer + ((7 - y) * stride + x) * vk_bytes_per_pixel; + if constexpr (morton_to_gl) { + if constexpr (format == PixelFormat::D24S8) { + gpu_ptr[0] = tile_ptr[3]; + std::memcpy(gpu_ptr + 1, tile_ptr, 3); + } else { + std::memcpy(gpu_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if constexpr (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gpu_ptr + 1, 3); + tile_ptr[3] = gpu_ptr[0]; + } else { + std::memcpy(tile_ptr, gpu_ptr, bytes_per_pixel); + } + } + } + } +} + +template +static void MortonCopy(u32 stride, u32 height, u8* gpu_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); + gpu_buffer += gl_bytes_per_pixel - bytes_per_pixel; + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = (begin_pixel_index % (stride * 8)) / 8; + u32 y = (begin_pixel_index / (stride * 8)) * 8; + + gpu_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; + + auto gpubuf_next_tile = [&] { + x = (x + 8) % stride; + gpu_buffer += 8 * gl_bytes_per_pixel; + if (!x) { + y += 8; + gpu_buffer -= stride * 9 * gl_bytes_per_pixel; + } + }; + + u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gpu_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + gpubuf_next_tile(); + } + + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + PAddr current_paddr = aligned_start; + while (tile_buffer < buffer_end) { + // Pokemon Super Mystery Dungeon will try to use textures that go beyond + // the end address of VRAM. Stop reading if reaches invalid address + if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) || + !VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) { + LOG_ERROR(Render_Vulkan, "Out of bound texture"); + break; + } + MortonCopyTile(stride, tile_buffer, gpu_buffer); + tile_buffer += tile_size; + current_paddr += tile_size; + gpubuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gpu_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array morton_to_gpu_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +static constexpr std::array gpu_to_morton_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +// Allocate an uninitialized texture of appropriate size and format for the surface +VKTexture RasterizerCacheVulkan::AllocateSurfaceTexture(vk::Format format, u32 width, u32 height) +{ + // First check if the texture can be recycled + auto recycled_tex = host_texture_recycler.find({format, width, height}); + if (recycled_tex != host_texture_recycler.end()) { + VKTexture texture = std::move(recycled_tex->second); + host_texture_recycler.erase(recycled_tex); + return texture; + } + + // Otherwise create a brand new texture + u32 levels = std::log2(std::max(width, height)) + 1; + VKTexture::Info texture_info = { + .width = width, + .height = height, + .format = format, + .type = vk::ImageType::e2D, + .view_type = vk::ImageViewType::e2D, + .mipmap_levels = levels + }; + + VKTexture texture; + texture.Create(texture_info); + + return texture; +} + +static bool BlitTextures(GLuint src_tex, const Common::Rectangle& src_rect, GLuint dst_tex, + const Common::Rectangle& dst_rect, SurfaceType type, + GLuint read_fb_handle, GLuint draw_fb_handle) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + u32 buffers = 0; + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_tex, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_tex, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + // TODO (wwylele): use GL_NEAREST for shadow map texture + // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but + // doing linear intepolation componentwise would cause incorrect value. However, for a + // well-programmed game this code path should be rarely executed for shadow map with + // inconsistent scale. + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, + dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + + return true; +} + +static bool FillSurface(const Surface& surface, const u8* fill_data, + const Common::Rectangle& fill_rect, GLuint draw_fb_handle) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.scissor.enabled = true; + state.scissor.x = static_cast(fill_rect.left); + state.scissor.y = static_cast(fill_rect.bottom); + state.scissor.width = static_cast(fill_rect.GetWidth()); + state.scissor.height = static_cast(fill_rect.GetHeight()); + + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + surface->InvalidateAllWatcher(); + + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Common::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); + + std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, + color.w / 255.f}; + + state.color_mask.red_enabled = GL_TRUE; + state.color_mask.green_enabled = GL_TRUE; + state.color_mask.blue_enabled = GL_TRUE; + state.color_mask.alpha_enabled = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_COLOR, 0, &color_values[0]); + } else if (surface->type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + u32 value_32bit = 0; + GLfloat value_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&value_32bit, fill_data, 2); + value_float = value_32bit / 65535.0f; // 2^16 - 1 + } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&value_32bit, fill_data, 3); + value_float = value_32bit / 16777215.0f; // 2^24 - 1 + } + + state.depth.write_mask = GL_TRUE; + state.Apply(); + glClearBufferfv(GL_DEPTH, 0, &value_float); + } else if (surface->type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + surface->texture.handle, 0); + + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, sizeof(u32)); + + GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + GLint value_int = (value_32bit >> 24); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); + } + return true; +} + +CachedSurface::~CachedSurface() { + if (texture.handle) { + auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8), + custom_tex_info.width, custom_tex_info.height} + : HostTextureTag{GetFormatTuple(pixel_format), GetScaledWidth(), + GetScaledHeight()}; + + owner.host_texture_recycler.emplace(tag, std::move(texture)); + } +} + +bool CachedSurface::CanFill(const SurfaceParams& dest_surface, + SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +MICROPROFILE_DEFINE(Vulkan_CopySurface, "Vulkan", "CopySurface", MP_RGB(128, 192, 64)); +void RasterizerCacheVulkan::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + MICROPROFILE_SCOPE(Vulkan_CopySurface); + + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + + ASSERT(src_surface != dst_surface); + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), + draw_framebuffer.handle); + return; + } + if (src_surface->CanSubRect(subrect_params)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), + dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), + src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); + return; + } + UNREACHABLE(); +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + const bool need_swap = + GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); + + const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + if (gl_buffer.empty()) { + gl_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format)); + } + + // TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + if (need_swap) { + // TODO(liushuyu): check if the byteswap here is 100% correct + // cannot fully test this + if (pixel_format == PixelFormat::RGBA8) { + for (std::size_t i = start_offset; i < load_end - addr; i += 4) { + gl_buffer[i] = texture_src_data[i + 3]; + gl_buffer[i + 1] = texture_src_data[i + 2]; + gl_buffer[i + 2] = texture_src_data[i + 1]; + gl_buffer[i + 3] = texture_src_data[i]; + } + } else if (pixel_format == PixelFormat::RGB8) { + for (std::size_t i = start_offset; i < load_end - addr; i += 3) { + gl_buffer[i] = texture_src_data[i + 2]; + gl_buffer[i + 1] = texture_src_data[i + 1]; + gl_buffer[i + 2] = texture_src_data[i]; + } + } + } else { + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + load_end - load_start); + } + } else { + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; + + const SurfaceInterval load_interval(load_start, load_end); + const auto rect = GetSubRect(FromInterval(load_interval)); + ASSERT(FromInterval(load_interval).GetInterval() == load_interval); + + for (unsigned y = rect.bottom; y < rect.top; ++y) { + for (unsigned x = rect.left; x < rect.right; ++x) { + auto vec4 = + Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const std::size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); + } + } + } else { + morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + addr, load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format)); + + // TODO: Should probably be done in ::Memory:: and check for other regions too + // same as loadglbuffer() + if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { + std::memcpy(&dst_buffer[offset], &fill_data[0], + std::min(fill_size, end_offset - offset)); + } + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + if (pixel_format == PixelFormat::RGBA8 && GLES) { + for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { + dst_buffer[i] = gl_buffer[i + 3]; + dst_buffer[i + 1] = gl_buffer[i + 2]; + dst_buffer[i + 2] = gl_buffer[i + 1]; + dst_buffer[i + 3] = gl_buffer[i]; + } + } else if (pixel_format == PixelFormat::RGB8 && GLES) { + for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { + dst_buffer[i] = gl_buffer[i + 2]; + dst_buffer[i + 1] = gl_buffer[i + 1]; + dst_buffer[i + 2] = gl_buffer[i]; + } + } else { + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], + flush_end - flush_start); + } + } else { + gl_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + addr, flush_start, flush_end); + } +} + +bool CachedSurface::LoadCustomTexture(u64 tex_hash) { + auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); + const auto& image_interface = Core::System::GetInstance().GetImageInterface(); + + if (custom_tex_cache.IsTextureCached(tex_hash)) { + custom_tex_info = custom_tex_cache.LookupTexture(tex_hash); + return true; + } + + if (!custom_tex_cache.CustomTextureExists(tex_hash)) { + return false; + } + + const auto& path_info = custom_tex_cache.LookupTexturePathInfo(tex_hash); + if (!image_interface->DecodePNG(custom_tex_info.tex, custom_tex_info.width, + custom_tex_info.height, path_info.path)) { + LOG_ERROR(Render_OpenGL, "Failed to load custom texture {}", path_info.path); + return false; + } + + const std::bitset<32> width_bits(custom_tex_info.width); + const std::bitset<32> height_bits(custom_tex_info.height); + if (width_bits.count() != 1 || height_bits.count() != 1) { + LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path); + return false; + } + + LOG_DEBUG(Render_OpenGL, "Loaded custom texture from {}", path_info.path); + Common::FlipRGBA8Texture(custom_tex_info.tex, custom_tex_info.width, custom_tex_info.height); + custom_tex_cache.CacheTexture(tex_hash, custom_tex_info.tex, custom_tex_info.width, + custom_tex_info.height); + return true; +} + +void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) { + // Make sure the texture size is a power of 2 + // If not, the surface is actually a framebuffer + std::bitset<32> width_bits(width); + std::bitset<32> height_bits(height); + if (width_bits.count() != 1 || height_bits.count() != 1) { + LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})", + tex_hash, width, height); + return; + } + + // Dump texture to RGBA8 and encode as PNG + const auto& image_interface = Core::System::GetInstance().GetImageInterface(); + auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); + std::string dump_path = + fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), + Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id); + if (!FileUtil::CreateFullPath(dump_path)) { + LOG_ERROR(Render, "Unable to create {}", dump_path); + return; + } + + dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, tex_hash, pixel_format); + if (!custom_tex_cache.IsTextureDumped(tex_hash) && !FileUtil::Exists(dump_path)) { + custom_tex_cache.SetTextureDumped(tex_hash); + + LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path); + std::vector decoded_texture; + decoded_texture.resize(width * height * 4); + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_texture = state.texture_units[0].texture_2d; + state.Apply(); + /* + GetTexImageOES is used even if not using OpenGL ES to work around a small issue that + happens if using custom textures with texture dumping at the same. + Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a + higher quality 256x256 texture. If the 256x256 texture is displayed first and the + 32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture + will appear in the corner of the 256x256 texture. If texture dumping is enabled and + the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL + texture is still 256x256, Citra crashes because it thinks the texture is only 32x32. + GetTexImageOES conveniently only dumps the specified region, and works on both + desktop and ES. + */ + // if the backend isn't OpenGL ES, this won't be initialized yet + if (!owner.texture_downloader_es) + owner.texture_downloader_es = std::make_unique(false); + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, + height, width, &decoded_texture[0]); + state.texture_units[0].texture_2d = old_texture; + state.Apply(); + Common::FlipRGBA8Texture(decoded_texture, width, height); + if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height)) + LOG_ERROR(Render_OpenGL, "Failed to save decoded texture"); + } +} + +MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); +void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) + return; + + MICROPROFILE_SCOPE(OpenGL_TextureUL); + + ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format)); + + u64 tex_hash = 0; + + if (Settings::values.dump_textures || Settings::values.custom_textures) { + tex_hash = Common::ComputeHash64(gl_buffer.data(), gl_buffer.size()); + } + + if (Settings::values.custom_textures) { + is_custom = LoadCustomTexture(tex_hash); + } + + // Load data from memory to the surface + GLint x0 = static_cast(rect.left); + GLint y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in + // surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + if (is_custom) { + unscaled_tex = owner.AllocateSurfaceTexture( + GetFormatTuple(PixelFormat::RGBA8), custom_tex_info.width, custom_tex_info.height); + } else { + unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight()); + } + target_tex = unscaled_tex.handle; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = target_tex; + cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + if (is_custom) { + if (res_scale == 1) { + texture = owner.AllocateSurfaceTexture(GetFormatTuple(PixelFormat::RGBA8), + custom_tex_info.width, custom_tex_info.height); + cur_state.texture_units[0].texture_2d = texture.handle; + cur_state.Apply(); + } + // always going to be using rgba8 + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(custom_tex_info.width)); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, custom_tex_info.width, custom_tex_info.height, + GL_RGBA, GL_UNSIGNED_BYTE, custom_tex_info.tex.data()); + } else { + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + if (Settings::values.dump_textures && !is_custom) + DumpTexture(target_tex, tex_hash); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + auto from_rect = + is_custom ? Common::Rectangle{0, custom_tex_info.height, custom_tex_info.width, 0} + : Common::Rectangle{0, rect.GetHeight(), rect.GetWidth(), 0}; + if (!owner.texture_filterer->Filter(unscaled_tex.handle, from_rect, texture.handle, + scaled_rect, type, read_fb_handle, draw_fb_handle)) { + BlitTextures(unscaled_tex.handle, from_rect, texture.handle, scaled_rect, type, + read_fb_handle, draw_fb_handle); + } + } + + InvalidateAllWatcher(); +} + +MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) { + return; + } + + MICROPROFILE_SCOPE(OpenGL_TextureDL); + + if (gl_buffer.empty()) { + gl_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format)); + } + + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + std::size_t buffer_offset = + (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + Common::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + OGLTexture unscaled_tex = + owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight()); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, + read_fb_handle, draw_fb_handle); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + if (GLES) { + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect.GetHeight(), rect.GetWidth(), + &gl_buffer[buffer_offset]); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + } else { + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + } + switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) { + case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment"); + break; + case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions"); + break; + case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT: + LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment"); + break; + case GL_FRAMEBUFFER_UNSUPPORTED: + LOG_WARNING(Render_OpenGL, "Framebuffer unsupported"); + break; + } + glReadPixels(static_cast(rect.left), static_cast(rect.bottom), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require + // validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +static constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template +static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval = std::nullopt) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (const auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + find_flags & MatchFlags::Copy + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (!(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + auto copy_interval = + params.FromInterval(*validate_interval).GetCopyableInterval(surface); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + texture_filterer = std::make_unique(Settings::values.texture_filter_name, + resolution_scale_factor); + format_reinterpreter = std::make_unique(); + if (GLES) + texture_downloader_es = std::make_unique(false); + + read_framebuffer.Create(); + draw_framebuffer.Create(); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { +#ifndef ANDROID + // This is for switching renderers, which is unsupported on Android, and costly on shutdown + ClearAll(false); +#endif +} + +MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const Common::Rectangle& src_rect, + const Surface& dst_surface, + const Common::Rectangle& dst_rect) { + MICROPROFILE_SCOPE(OpenGL_BlitSurface); + + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + dst_surface->InvalidateAllWatcher(); + + return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, + dst_rect, src_surface->type, read_framebuffer.handle, + draw_framebuffer.handle); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find + // it to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, Common::Rectangle{}); + } + + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); + } + + return std::make_tuple(surface, surface->GetScaledSubRect(params)); +} + +Surface RasterizerCacheOpenGL::GetTextureSurface( + const Pica::TexturingRegs::FullTextureConfig& config) { + Pica::Texture::TextureInfo info = + Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + return GetTextureSurface(info, config.config.lod.max_level); +} + +Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInfo& info, + u32 max_level) { + if (info.physical_address == 0) { + return nullptr; + } + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.res_scale = texture_filterer->IsNull() ? 1 : resolution_scale_factor; + params.UpdateParams(); + + u32 min_width = info.width >> max_level; + u32 min_height = info.height >> max_level; + if (min_width % 8 != 0 || min_height % 8 != 0) { + LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, + min_height); + return nullptr; + } + if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { + LOG_CRITICAL(Render_OpenGL, + "Texture size ({}x{}) does not support required mipmap level ({})", + params.width, params.height, max_level); + return nullptr; + } + + auto surface = GetSurface(params, ScaleMatch::Ignore, true); + if (!surface) + return nullptr; + + // Update mipmap if necessary + if (max_level != 0) { + if (max_level >= 8) { + // since PICA only supports texture size between 8 and 1024, there are at most eight + // possible mipmap levels including the base. + LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); + return nullptr; + } + OpenGLState prev_state = OpenGLState::GetCurState(); + OpenGLState state; + SCOPE_EXIT({ prev_state.Apply(); }); + auto format_tuple = GetFormatTuple(params.pixel_format); + + // Allocate more mipmap level if necessary + if (surface->max_level < max_level) { + state.texture_units[0].texture_2d = surface->texture.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); + u32 width; + u32 height; + if (surface->is_custom) { + width = surface->custom_tex_info.width; + height = surface->custom_tex_info.height; + } else { + width = surface->GetScaledWidth(); + height = surface->GetScaledHeight(); + } + // If we are using ARB_texture_storage then we've already allocated all of the mipmap + // levels + if (!GL_ARB_texture_storage) { + for (u32 level = surface->max_level + 1; level <= max_level; ++level) { + glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level, + height >> level, 0, format_tuple.format, format_tuple.type, + nullptr); + } + } + if (surface->is_custom || !texture_filterer->IsNull()) { + // TODO: proper mipmap support for custom textures + glGenerateMipmap(GL_TEXTURE_2D); + } + surface->max_level = max_level; + } + + // Blit mipmaps that have been invalidated + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.ResetTexture(surface->texture.handle); + SurfaceParams surface_params = *surface; + for (u32 level = 1; level <= max_level; ++level) { + // In PICA all mipmap levels are stored next to each other + surface_params.addr += + surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; + surface_params.width /= 2; + surface_params.height /= 2; + surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it + surface_params.UpdateParams(); + auto& watcher = surface->level_watchers[level - 1]; + if (!watcher || !watcher->Get()) { + auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); + if (level_surface) { + watcher = level_surface->CreateWatcher(); + } else { + watcher = nullptr; + } + } + + if (watcher && !watcher->IsValid()) { + auto level_surface = watcher->Get(); + if (!level_surface->invalid_regions.empty()) { + ValidateSurface(level_surface, level_surface->addr, level_surface->size); + } + state.ResetTexture(level_surface->texture.handle); + state.Apply(); + if (!surface->is_custom && texture_filterer->IsNull()) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + level_surface->texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + auto src_rect = level_surface->GetScaledRect(); + auto dst_rect = surface_params.GetScaledRect(); + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, + dst_rect.left, dst_rect.bottom, dst_rect.right, dst_rect.top, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + } + watcher->Validate(); + } + } + } + + return surface; +} + +const CachedTextureCube& RasterizerCacheOpenGL::GetTextureCube(const TextureCubeConfig& config) { + auto& cube = texture_cube_cache[config]; + + struct Face { + Face(std::shared_ptr& watcher, PAddr address, GLenum gl_face) + : watcher(watcher), address(address), gl_face(gl_face) {} + std::shared_ptr& watcher; + PAddr address; + GLenum gl_face; + }; + + const std::array faces{{ + {cube.px, config.px, GL_TEXTURE_CUBE_MAP_POSITIVE_X}, + {cube.nx, config.nx, GL_TEXTURE_CUBE_MAP_NEGATIVE_X}, + {cube.py, config.py, GL_TEXTURE_CUBE_MAP_POSITIVE_Y}, + {cube.ny, config.ny, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y}, + {cube.pz, config.pz, GL_TEXTURE_CUBE_MAP_POSITIVE_Z}, + {cube.nz, config.nz, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z}, + }}; + + for (const Face& face : faces) { + if (!face.watcher || !face.watcher->Get()) { + Pica::Texture::TextureInfo info; + info.physical_address = face.address; + info.height = info.width = config.width; + info.format = config.format; + info.SetDefaultStride(); + auto surface = GetTextureSurface(info); + if (surface) { + face.watcher = surface->CreateWatcher(); + } else { + // Can occur when texture address is invalid. We mark the watcher with nullptr + // in this case and the content of the face wouldn't get updated. These are + // usually leftover setup in the texture unit and games are not supposed to draw + // using them. + face.watcher = nullptr; + } + } + } + + if (cube.texture.handle == 0) { + for (const Face& face : faces) { + if (face.watcher) { + auto surface = face.watcher->Get(); + cube.res_scale = std::max(cube.res_scale, surface->res_scale); + } + } + + cube.texture.Create(); + AllocateTextureCube( + cube.texture.handle, + GetFormatTuple(CachedSurface::PixelFormatFromTextureFormat(config.format)), + cube.res_scale * config.width); + } + + u32 scaled_size = cube.res_scale * config.width; + + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.ResetTexture(cube.texture.handle); + + for (const Face& face : faces) { + if (face.watcher && !face.watcher->IsValid()) { + auto surface = face.watcher->Get(); + if (!surface->invalid_regions.empty()) { + ValidateSurface(surface, surface->addr, surface->size); + } + state.ResetTexture(surface->texture.handle); + state.Apply(); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + surface->texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, face.gl_face, + cube.texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + + auto src_rect = surface->GetScaledRect(); + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, 0, 0, + scaled_size, scaled_size, GL_COLOR_BUFFER_BIT, GL_LINEAR); + face.watcher->Validate(); + } + } + + return cube; +} + +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, const Common::Rectangle& viewport_rect) { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // update resolution_scale_factor and reset cache if changed + if ((resolution_scale_factor != VideoCore::GetResolutionScaleFactor()) | + (VideoCore::g_texture_filter_update_requested.exchange(false) && + texture_filterer->Reset(Settings::values.texture_filter_name, resolution_scale_factor))) { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + texture_cube_cache.clear(); + } + + Common::Rectangle viewport_clamped{ + static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + Common::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + Common::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + Common::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + color_surface->InvalidateAllWatcher(); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + depth_surface->InvalidateAllWatcher(); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); +} + +Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(*this); + + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + new_surface->res_scale = std::numeric_limits::max(); + + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + Common::Rectangle rect{}; + + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return std::make_tuple(match_surface, rect); +} + +void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (const auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + auto validate_regions = surface->invalid_regions & validate_interval; + auto notify_validated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; + + while (true) { + const auto it = validate_regions.begin(); + if (it == validate_regions.end()) + break; + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + notify_validated(copy_interval); + continue; + } + + // Try to find surface in cache with different format + // that can can be reinterpreted to the requested format. + if (ValidateByReinterpretation(surface, params, interval)) { + notify_validated(interval); + continue; + } + // Could not find a matching reinterpreter, check if we need to implement a + // reinterpreter + if (NoUnimplementedReinterpretations(surface, params, interval) && + !IntervalHasInvalidPixelFormat(params, interval)) { + // No surfaces were found in the cache that had a matching bit-width. + // If the region was created entirely on the GPU, + // assume it was a developer mistake and skip flushing. + if (boost::icl::contains(dirty_regions, interval)) { + LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is " + "invalid. Skipping validation"); + validate_regions.erase(interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadGLBuffer(params.addr, params.end); + surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + notify_validated(params.GetInterval()); + } +} + +bool RasterizerCacheOpenGL::NoUnimplementedReinterpretations(const Surface& surface, + SurfaceParams& params, + const SurfaceInterval& interval) { + static constexpr std::array all_formats{ + PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, + PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, + PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, + PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, + PixelFormat::D24S8, + }; + bool implemented = true; + for (PixelFormat format : all_formats) { + if (SurfaceParams::GetFormatBpp(format) == surface->GetFormatBpp()) { + params.pixel_format = format; + // This could potentially be expensive, + // although experimentally it hasn't been too bad + Surface test_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (test_surface != nullptr) { + LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", + SurfaceParams::PixelFormatAsString(format), + SurfaceParams::PixelFormatAsString(surface->pixel_format)); + implemented = false; + } + } + } + return implemented; +} + +bool RasterizerCacheOpenGL::IntervalHasInvalidPixelFormat(SurfaceParams& params, + const SurfaceInterval& interval) { + params.pixel_format = PixelFormat::Invalid; + for (const auto& set : RangeFromInterval(surface_cache, interval)) + for (const auto& surface : set.second) + if (surface->pixel_format == PixelFormat::Invalid) { + LOG_WARNING(Render_OpenGL, "Surface found with invalid pixel format"); + return true; + } + return false; +} + +bool RasterizerCacheOpenGL::ValidateByReinterpretation(const Surface& surface, + SurfaceParams& params, + const SurfaceInterval& interval) { + auto [cvt_begin, cvt_end] = + format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format); + for (auto reinterpreter = cvt_begin; reinterpreter != cvt_end; ++reinterpreter) { + PixelFormat format = reinterpreter->first.src_format; + params.pixel_format = format; + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + + if (reinterpret_surface != nullptr) { + SurfaceInterval reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); + SurfaceParams reinterpret_params = surface->FromInterval(reinterpret_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); + auto dest_rect = surface->GetScaledSubRect(reinterpret_params); + + if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && + surface->res_scale == resolution_scale_factor) { + // The destination surface is either a framebuffer, or a filtered texture. + // Create an intermediate surface to convert to before blitting to the + // destination. + Common::Rectangle tmp_rect{0, dest_rect.GetHeight() / resolution_scale_factor, + dest_rect.GetWidth() / resolution_scale_factor, 0}; + OGLTexture tmp_tex = AllocateSurfaceTexture( + GetFormatTuple(reinterpreter->first.dst_format), tmp_rect.right, tmp_rect.top); + reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect, + read_framebuffer.handle, tmp_tex.handle, + tmp_rect, draw_framebuffer.handle); + SurfaceParams::SurfaceType type = + SurfaceParams::GetFormatType(reinterpreter->first.dst_format); + + if (!texture_filterer->Filter(tmp_tex.handle, tmp_rect, surface->texture.handle, + dest_rect, type, read_framebuffer.handle, + draw_framebuffer.handle)) { + BlitTextures(tmp_tex.handle, tmp_rect, surface->texture.handle, dest_rect, type, + read_framebuffer.handle, draw_framebuffer.handle); + } + } else { + reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect, + read_framebuffer.handle, surface->texture.handle, + dest_rect, draw_framebuffer.handle); + } + return true; + } + } + return false; +} + +void RasterizerCacheOpenGL::ClearAll(bool flush) { + const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF); + // Force flush all surfaces from the cache + if (flush) { + FlushRegion(0x0, 0xFFFFFFFF); + } + // Unmark all of the marked pages + for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) { + const auto interval = pair.first & flush_interval; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } + + // Remove the whole cache without really looking at it. + cached_pages -= flush_interval; + dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); + surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF); + remove_surfaces.clear(); +} + +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + } + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + flushed_intervals += interval; + } + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +void RasterizerCacheOpenGL::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (const auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + cached_surface->InvalidateAllWatcher(); + + // If the surface has no salvageable data it should be removed from the cache to avoid + // clogging the data structure + if (cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } + } + } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (const auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(*this); + static_cast(*surface) = params; + + surface->invalid_regions.insert(surface->GetInterval()); + + surface->texture = + AllocateSurfaceTexture(GetFormatTuple(surface->pixel_format), surface->GetScaledWidth(), + surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (!surface->registered) { + return; + } + surface->registered = false; + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, + true); + else if (delta < 0 && count == -delta) + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, + false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h new file mode 100644 index 000000000..de44d1f29 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h @@ -0,0 +1,371 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-local-typedefs" +#endif +#include +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#include +#include +#include +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/math_util.h" +#include "core/custom_tex_cache.h" +#include "video_core/renderer_vulkan/vk_surface_params.h" +#include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/texture/texture_decode.h" + +namespace Vulkan { + +class RasterizerCacheVulkan; +class TextureFilterer; +class FormatReinterpreterVulkan; + +const vk::Format& GetFormatTuple(SurfaceParams::PixelFormat pixel_format); + +struct HostTextureTag { + vk::Format format; + u32 width; + u32 height; + bool operator==(const HostTextureTag& rhs) const noexcept { + return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height); + }; +}; + +struct TextureCubeConfig { + PAddr px; + PAddr nx; + PAddr py; + PAddr ny; + PAddr pz; + PAddr nz; + u32 width; + Pica::TexturingRegs::TextureFormat format; + + bool operator==(const TextureCubeConfig& rhs) const { + return std::tie(px, nx, py, ny, pz, nz, width, format) == + std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format); + } + + bool operator!=(const TextureCubeConfig& rhs) const { + return !(*this == rhs); + } +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept { + std::size_t hash = 0; + boost::hash_combine(hash, tag.format); + boost::hash_combine(hash, tag.width); + boost::hash_combine(hash, tag.height); + return hash; + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::TextureCubeConfig& config) const noexcept { + std::size_t hash = 0; + boost::hash_combine(hash, config.px); + boost::hash_combine(hash, config.nx); + boost::hash_combine(hash, config.py); + boost::hash_combine(hash, config.ny); + boost::hash_combine(hash, config.pz); + boost::hash_combine(hash, config.nz); + boost::hash_combine(hash, config.width); + boost::hash_combine(hash, static_cast(config.format)); + return hash; + } +}; +} // namespace std + +namespace Vulkan { + +using SurfaceSet = std::set; + +using SurfaceRegions = boost::icl::interval_set; +using SurfaceMap = + boost::icl::interval_map; +using SurfaceCache = + boost::icl::interval_map; + +static_assert(std::is_same() && + std::is_same(), + "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +/** + * A watcher that notifies whether a cached surface has been changed. This is useful for caching + * surface collection objects, including texture cube and mipmap. + */ +struct SurfaceWatcher { +public: + explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} + + /** + * Checks whether the surface has been changed. + * @return false if the surface content has been changed since last Validate() call or has been + * destroyed; otherwise true + */ + bool IsValid() const { + return !surface.expired() && valid; + } + + /// Marks that the content of the referencing surface has been updated to the watcher user. + void Validate() { + ASSERT(!surface.expired()); + valid = true; + } + + /// Gets the referencing surface. Returns null if the surface has been destroyed + Surface Get() const { + return surface.lock(); + } + +private: + friend struct CachedSurface; + std::weak_ptr surface; + bool valid = false; +}; + +class RasterizerCacheVulkan; + +struct CachedSurface : SurfaceParams, std::enable_shared_from_this { + CachedSurface(RasterizerCacheVulkan& owner) : owner{owner} {} + ~CachedSurface(); + + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsSurfaceFullyInvalid() const { + auto interval = GetInterval(); + return *invalid_regions.equal_range(interval).first == interval; + } + + bool registered = false; + SurfaceRegions invalid_regions; + + u32 fill_size = 0; /// Number of bytes to read from fill_data + std::array fill_data; + + VKTexture texture; + + /// max mipmap level that has been attached to the texture + u32 max_level = 0; + /// level_watchers[i] watches the (i+1)-th level mipmap source surface + std::array, 7> level_watchers; + + bool is_custom = false; + Core::CustomTexInfo custom_tex_info; + + static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { + return format == PixelFormat::Invalid + ? 0 + : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) + ? 4 + : SurfaceParams::GetFormatBpp(format) / 8; + } + + std::vector vk_buffer; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Custom texture loading and dumping + bool LoadCustomTexture(u64 tex_hash); + void DumpTexture(VKTexture& target_tex, u64 tex_hash); + + // Upload/Download data in vk_buffer in/to this surface's texture + void UploadGLTexture(Common::Rectangle rect, GLuint read_fb_handle, GLuint draw_fb_handle); + void DownloadGLTexture(const Common::Rectangle& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); + + std::shared_ptr CreateWatcher() { + auto watcher = std::make_shared(weak_from_this()); + watchers.push_front(watcher); + return watcher; + } + + void InvalidateAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + } + } + } + + void UnlinkAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + locked->surface.reset(); + } + } + watchers.clear(); + } + +private: + RasterizerCacheVulkan& owner; + std::list> watchers; +}; + +struct CachedTextureCube { + VKTexture texture; + u16 res_scale = 1; + std::shared_ptr px; + std::shared_ptr nx; + std::shared_ptr py; + std::shared_ptr ny; + std::shared_ptr pz; + std::shared_ptr nz; +}; + +class TextureDownloader; + +class RasterizerCacheVulkan : NonCopyable { +public: + RasterizerCacheVulkan(); + ~RasterizerCacheVulkan(); + + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, + const Surface& dst_surface, const Common::Rectangle& dst_rect); + + /// Copy one surface's region to another + void CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval); + + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from + /// 3DS memory to OpenGL and caches it (if not already cached) + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); + + /// Get a texture cube based on the texture configuration + const CachedTextureCube& GetTextureCube(const TextureCubeConfig& config); + + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const Common::Rectangle& viewport_rect); + + /// Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); + + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); + + /// Flush all cached resources tracked by this cache manager + void FlushAll(); + + /// Clear all cached resources tracked by this cache manager + void ClearAll(bool flush); + + // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead + // in the driver + // this must be placed above the surface_cache to ensure all cached surfaces are destroyed + // before destroying the recycler + std::unordered_multimap host_texture_recycler; + +private: + void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); + + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + // Returns false if there is a surface in the cache at the interval with the same bit-width, + bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface, + Vulkan::SurfaceParams& params, + const Vulkan::SurfaceInterval& interval); + + // Return true if a surface with an invalid pixel format exists at the interval + bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); + + // Attempt to find a reinterpretable surface in the cache and use it to copy for validation + bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, + const SurfaceInterval& interval); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + + SurfaceCache surface_cache; + PageMap cached_pages; + SurfaceMap dirty_regions; + SurfaceSet remove_surfaces; + + OGLFramebuffer read_framebuffer; + OGLFramebuffer draw_framebuffer; + + u16 resolution_scale_factor; + + std::unordered_map texture_cube_cache; + + std::recursive_mutex mutex; + +public: + VKTexture AllocateSurfaceTexture(vk::Format format, u32 width, u32 height); + + std::unique_ptr texture_filterer; + std::unique_ptr format_reinterpreter; + std::unique_ptr texture_downloader_es; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_resource_cache.cpp b/src/video_core/renderer_vulkan/vk_resource_cache.cpp new file mode 100644 index 000000000..07b8035f1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_cache.cpp @@ -0,0 +1,164 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_resource_cache.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include +#include +#include + +namespace Vulkan { + +VKResourceCache::~VKResourceCache() +{ + for (int i = 0; i < DESCRIPTOR_SET_LAYOUT_COUNT; i++) { + g_vk_instace->GetDevice().destroyDescriptorSetLayout(descriptor_layouts[i]); + } +} + +bool VKResourceCache::Initialize() +{ + // Define the descriptor sets we will be using + std::array ubo_set = {{ + { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | vk::ShaderStageFlagBits::eFragment }, // shader_data + { 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex } // pica_uniforms + }}; + + std::array texture_set = {{ + { 0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex0 + { 1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex1 + { 2, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex2 + { 3, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex_cube + }}; + + std::array lut_set = {{ + { 0, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // texture_buffer_lut_lf + { 1, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // texture_buffer_lut_rg + { 2, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment } // texture_buffer_lut_rgba + }}; + + // Create and store descriptor set layouts + std::array create_infos = {{ + { vk::DescriptorSetLayoutCreateFlags(), ubo_set }, + { vk::DescriptorSetLayoutCreateFlags(), texture_set }, + { vk::DescriptorSetLayoutCreateFlags(), lut_set } + }}; + + for (int i = 0; i < DESCRIPTOR_SET_LAYOUT_COUNT; i++) { + descriptor_layouts[i] = g_vk_instace->GetDevice().createDescriptorSetLayout(create_infos[i]); + } + + // Create the standard descriptor set layout + vk::PipelineLayoutCreateInfo layout_info({}, descriptor_layouts); + pipeline_layout = g_vk_instace->GetDevice().createPipelineLayoutUnique(layout_info); + + if (!CreateStaticSamplers()) + return false; + + // Create global texture staging buffer + texture_upload_buffer.Create(MAX_TEXTURE_UPLOAD_BUFFER_SIZE, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + vk::BufferUsageFlagBits::eTransferSrc); + + return true; +} + +vk::Sampler VKResourceCache::GetSampler(const SamplerInfo& info) +{ + auto iter = sampler_cache.find(info); + if (iter != sampler_cache.end()) { + return iter->second; + } + + // Create texture sampler + auto properties = g_vk_instace->GetPhysicalDevice().getProperties(); + auto features = g_vk_instace->GetPhysicalDevice().getFeatures(); + vk::SamplerCreateInfo sampler_info + ( + {}, + info.mag_filter, + info.min_filter, + info.mipmap_mode, + info.wrapping[0], info.wrapping[1], info.wrapping[2], + {}, + features.samplerAnisotropy, + properties.limits.maxSamplerAnisotropy, + false, + vk::CompareOp::eAlways, + {}, + {}, + vk::BorderColor::eFloatTransparentBlack, + false + ); + + auto sampler = g_vk_instace->GetDevice().createSamplerUnique(sampler_info); + vk::Sampler handle = sampler.get(); + + // Store it even if it failed + sampler_cache.emplace(info, std::move(sampler)); + return handle; +} + +vk::RenderPass VKResourceCache::GetRenderPass(vk::Format color_format, vk::Format depth_format, + u32 multisamples, vk::AttachmentLoadOp load_op) +{ + auto key = std::tie(color_format, depth_format, multisamples, load_op); + auto it = render_pass_cache.find(key); + if (it != render_pass_cache.end()) { + return it->second; + } + + vk::SubpassDescription subpass({}, vk::PipelineBindPoint::eGraphics); + std::array attachments; + std::array references; + u32 index = 0; + + if (color_format != vk::Format::eUndefined) { + references[index] = vk::AttachmentReference{index, vk::ImageLayout::eColorAttachmentOptimal}; + attachments[index] = + { + {}, + color_format, + static_cast(multisamples), + load_op, + vk::AttachmentStoreOp::eStore, + vk::AttachmentLoadOp::eDontCare, + vk::AttachmentStoreOp::eDontCare, + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::eColorAttachmentOptimal + }; + + subpass.setColorAttachmentCount(1); + subpass.setPColorAttachments(&references[index++]); + } + + if (depth_format != vk::Format::eUndefined) { + references[index] = vk::AttachmentReference{index, vk::ImageLayout::eDepthStencilAttachmentOptimal}; + attachments[index] = + { + {}, + depth_format, + static_cast(multisamples), + load_op, + vk::AttachmentStoreOp::eStore, + vk::AttachmentLoadOp::eDontCare, + vk::AttachmentStoreOp::eDontCare, + vk::ImageLayout::eDepthStencilAttachmentOptimal, + vk::ImageLayout::eDepthStencilAttachmentOptimal + }; + + subpass.setPDepthStencilAttachment(&references[index++]); + } + + std::array subpasses = { subpass }; + vk::RenderPassCreateInfo renderpass_info({}, attachments, subpasses); + + auto renderpass = g_vk_instace->GetDevice().createRenderPassUnique(renderpass_info); + vk::RenderPass handle = renderpass.get(); + + render_pass_cache.emplace(key, std::move(renderpass)); + return handle; +} +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_cache.h b/src/video_core/renderer_vulkan/vk_resource_cache.h new file mode 100644 index 000000000..5cf8e08fc --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_cache.h @@ -0,0 +1,58 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "video_core/renderer_vulkan/vk_texture.h" + +namespace Vulkan { + +using RenderPassCacheKey = std::tuple; + +constexpr u32 MAX_TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr u32 DESCRIPTOR_SET_LAYOUT_COUNT = 3; + +class VKResourceCache +{ +public: + VKResourceCache() = default; + ~VKResourceCache(); + + // Perform at startup, create descriptor layouts, compiles all static shaders. + bool Initialize(); + void Shutdown(); + + // Public interface. + VKBuffer& GetTextureUploadBuffer() { return texture_upload_buffer; } + vk::Sampler GetSampler(const SamplerInfo& info); + vk::RenderPass GetRenderPass(vk::Format color_format, vk::Format depth_format, u32 multisamples, vk::AttachmentLoadOp load_op); + vk::PipelineCache GetPipelineCache() const { return pipeline_cache.get(); } + +private: + // Dummy image for samplers that are unbound + VKTexture dummy_texture; + VKBuffer texture_upload_buffer; + + // Descriptor sets + std::array descriptor_layouts; + vk::UniquePipelineLayout pipeline_layout; + + // Render pass cache + std::unordered_map render_pass_cache; + std::unordered_map sampler_cache; + + vk::UniquePipelineCache pipeline_cache; + std::string pipeline_cache_filename; +}; + +extern std::unique_ptr g_object_cache; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp deleted file mode 100644 index f185b2355..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_vars.h" - -MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); - -namespace OpenGL { - -void OGLRenderbuffer::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenRenderbuffers(1, &handle); -} - -void OGLRenderbuffer::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteRenderbuffers(1, &handle); - OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply(); - handle = 0; -} - -void OGLTexture::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenTextures(1, &handle); -} - -void OGLTexture::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteTextures(1, &handle); - OpenGLState::GetCurState().ResetTexture(handle).Apply(); - handle = 0; -} - -void OGLTexture::Allocate(GLenum target, GLsizei levels, GLenum internalformat, GLenum format, - GLenum type, GLsizei width, GLsizei height, GLsizei depth) { - const bool tex_storage = GLAD_GL_ARB_texture_storage || GLES; - - switch (target) { - case GL_TEXTURE_1D: - case GL_TEXTURE: - if (tex_storage) { - glTexStorage1D(target, levels, internalformat, width); - } else { - for (GLsizei level{0}; level < levels; ++level) { - glTexImage1D(target, level, internalformat, width, 0, format, type, nullptr); - width >>= 1; - } - } - break; - case GL_TEXTURE_2D: - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_CUBE_MAP: - if (tex_storage) { - glTexStorage2D(target, levels, internalformat, width, height); - } else { - for (GLsizei level{0}; level < levels; ++level) { - glTexImage2D(target, level, internalformat, width, height, 0, format, type, - nullptr); - width >>= 1; - if (target != GL_TEXTURE_1D_ARRAY) - height >>= 1; - } - } - break; - case GL_TEXTURE_3D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - if (tex_storage) { - glTexStorage3D(target, levels, internalformat, width, height, depth); - } else { - for (GLsizei level{0}; level < levels; ++level) { - glTexImage3D(target, level, internalformat, width, height, depth, 0, format, type, - nullptr); - } - width >>= 1; - height >>= 1; - if (target == GL_TEXTURE_3D) - depth >>= 1; - } - break; - } - - if (!tex_storage) { - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels - 1); - } -} - -void OGLSampler::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenSamplers(1, &handle); -} - -void OGLSampler::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteSamplers(1, &handle); - OpenGLState::GetCurState().ResetSampler(handle).Apply(); - handle = 0; -} - -void OGLShader::Create(const char* source, GLenum type) { - if (handle != 0) - return; - if (source == nullptr) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = LoadShader(source, type); -} - -void OGLShader::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteShader(handle); - handle = 0; -} - -void OGLProgram::Create(bool separable_program, const std::vector& shaders) { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = LoadProgram(separable_program, shaders); -} - -void OGLProgram::Create(const char* vert_shader, const char* frag_shader) { - OGLShader vert, frag; - vert.Create(vert_shader, GL_VERTEX_SHADER); - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(false, {vert.handle, frag.handle}); -} - -void OGLProgram::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteProgram(handle); - OpenGLState::GetCurState().ResetProgram(handle).Apply(); - handle = 0; -} - -void OGLPipeline::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenProgramPipelines(1, &handle); -} - -void OGLPipeline::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteProgramPipelines(1, &handle); - OpenGLState::GetCurState().ResetPipeline(handle).Apply(); - handle = 0; -} - -void OGLBuffer::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenBuffers(1, &handle); -} - -void OGLBuffer::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteBuffers(1, &handle); - OpenGLState::GetCurState().ResetBuffer(handle).Apply(); - handle = 0; -} - -void OGLVertexArray::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenVertexArrays(1, &handle); -} - -void OGLVertexArray::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteVertexArrays(1, &handle); - OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); - handle = 0; -} - -void OGLFramebuffer::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenFramebuffers(1, &handle); -} - -void OGLFramebuffer::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteFramebuffers(1, &handle); - OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); - handle = 0; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h deleted file mode 100644 index 4cbadf46c..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include "common/common_types.h" - -namespace Vulkan { - -class VKContext; - -struct VertexInfo -{ - VertexInfo() = default; - VertexInfo(glm::vec3 position, glm::vec3 color, glm::vec2 coords) : - position(position), color(color), texcoords(coords) {}; - - glm::vec3 position; - glm::vec3 color; - glm::vec2 texcoords; -}; - -struct Vertex : public VertexInfo -{ - Vertex() = default; - Vertex(glm::vec3 position, glm::vec3 color = {}, glm::vec2 coords = {}) : VertexInfo(position, color, coords) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); - static constexpr std::array attribute_desc = - { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, color)), - vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, texcoords)), - }; -}; - -class VKBuffer : public NonCopyable, public Resource -{ - friend class VertexBuffer; -public: - VKBuffer(std::shared_ptr context); - ~VKBuffer(); - - void Create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage); - void Bind(vk::CommandBuffer& command_buffer); - - static uint32_t FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr context); - static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region); - -public: - void* memory = nullptr; - vk::UniqueBuffer buffer; - vk::UniqueDeviceMemory buffer_memory; - vk::UniqueBufferView buffer_view; - uint32_t size = 0; - -protected: - std::shared_ptr context; -}; - -class VKTexture : public NonCopyable, public Resource -{ - friend class VkContext; -public: - VKTexture(const std::shared_ptr& context); - ~VKTexture() = default; - - void Create(int width, int height, vk::ImageType type, vk::Format format = vk::Format::eR8G8B8A8Uint); - void CopyPixels(uint8_t* pixels, uint32_t count); - -private: - void TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout); - -private: - // Texture buffer - void* pixels = nullptr; - std::shared_ptr context; - uint32_t width = 0, height = 0, channels = 0; - VKBuffer staging; - - // Texture objects - vk::UniqueImage texture; - vk::UniqueImageView texture_view; - vk::UniqueDeviceMemory texture_memory; - vk::UniqueSampler texture_sampler; - vk::Format format; -}; - -class OGLShader : private NonCopyable { -public: - OGLShader() = default; - - OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLShader() { - Release(); - } - - OGLShader& operator=(OGLShader&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - void Create(const char* source, GLenum type); - - void Release(); - - GLuint handle = 0; -}; - -class OGLProgram : private NonCopyable { -public: - OGLProgram() = default; - - OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLProgram() { - Release(); - } - - OGLProgram& operator=(OGLProgram&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new program from given shader objects - void Create(bool separable_program, const std::vector& shaders); - - /// Creates a new program from given shader soruce code - void Create(const char* vert_shader, const char* frag_shader); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLPipeline : private NonCopyable { -public: - OGLPipeline() = default; - OGLPipeline(OGLPipeline&& o) noexcept { - handle = std::exchange(o.handle, 0); - } - ~OGLPipeline() { - Release(); - } - OGLPipeline& operator=(OGLPipeline&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLBuffer : private NonCopyable { -public: - OGLBuffer() = default; - - OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLBuffer() { - Release(); - } - - OGLBuffer& operator=(OGLBuffer&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLVertexArray : private NonCopyable { -public: - OGLVertexArray() = default; - - OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLVertexArray() { - Release(); - } - - OGLVertexArray& operator=(OGLVertexArray&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -class OGLFramebuffer : private NonCopyable { -public: - OGLFramebuffer() = default; - - OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLFramebuffer() { - Release(); - } - - OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_shader_state.h b/src/video_core/renderer_vulkan/vk_shader_state.h new file mode 100644 index 000000000..f79446adf --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_shader_state.h @@ -0,0 +1,235 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "common/hash.h" +#include "video_core/regs.h" +#include "video_core/shader/shader.h" + +namespace Vulkan { + +enum class ProgramType : u32 { VS, GS, FS }; + +enum Attributes { + ATTRIBUTE_POSITION, + ATTRIBUTE_COLOR, + ATTRIBUTE_TEXCOORD0, + ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, +}; + +// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() +struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { + Pica::TexturingRegs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } +}; + +struct PicaFSConfigState { + Pica::FramebufferRegs::CompareFunc alpha_test_func; + Pica::RasterizerRegs::ScissorMode scissor_test_mode; + Pica::TexturingRegs::TextureConfig::TextureType texture0_type; + bool texture2_use_coord1; + std::array tev_stages; + u8 combiner_buffer_input; + + Pica::RasterizerRegs::DepthBuffering depthmap_enable; + Pica::TexturingRegs::FogMode fog_mode; + bool fog_flip; + bool alphablend_enable; + Pica::FramebufferRegs::LogicOp logic_op; + + struct { + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; + bool shadow_enable; + } light[8]; + + bool enable; + unsigned src_num; + Pica::LightingRegs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::LightingRegs::LightingConfig config; + bool enable_primary_alpha; + bool enable_secondary_alpha; + + bool enable_shadow; + bool shadow_primary; + bool shadow_secondary; + bool shadow_invert; + bool shadow_alpha; + unsigned shadow_selector; + + struct { + bool enable; + bool abs_input; + Pica::LightingRegs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + struct { + bool enable; + u32 coord; + Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; + Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; + bool separate_alpha; + bool noise_enable; + Pica::TexturingRegs::ProcTexShift u_shift, v_shift; + u32 lut_width; + u32 lut_offset0; + u32 lut_offset1; + u32 lut_offset2; + u32 lut_offset3; + u32 lod_min; + u32 lod_max; + Pica::TexturingRegs::ProcTexFilter lut_filter; + } proctex; + + bool shadow_rendering; + bool shadow_texture_orthographic; +}; + +/** + * This struct contains all state used to generate the GLSL fragment shader that emulates the + * current Pica register configuration. This struct is used as a cache key for generated GLSL shader + * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by + * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where + * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) + * two separate shaders sharing the same key. + */ +struct PicaFSConfig : Common::HashableStruct { + + /// Construct a PicaFSConfig with the given Pica register configuration. + static PicaFSConfig BuildFromRegs(const Pica::Regs& regs); + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); + } +}; + +/** + * This struct contains common information to identify a GL vertex/geometry shader generated from + * PICA vertex/geometry shader. + */ +struct PicaShaderConfigCommon { + void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); + + u64 program_hash; + u64 swizzle_hash; + u32 main_offset; + bool sanitize_mul; + + u32 num_outputs; + + // output_map[output register index] -> output attribute index + std::array output_map; +}; + +/** + * This struct contains information to identify a GL vertex shader generated from PICA vertex + * shader. + */ +struct PicaVSConfig : Common::HashableStruct { + explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + state.Init(regs, setup); + } + explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { + state = conf; + } +}; + +struct PicaGSConfigCommonRaw { + void Init(const Pica::Regs& regs); + + u32 vs_output_attributes; + u32 gs_output_attributes; + + struct SemanticMap { + u32 attribute_index; + u32 component_index; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps; +}; + +/** + * This struct contains information to identify a GL geometry shader generated from PICA no-geometry + * shader pipeline + */ +struct PicaFixedGSConfig : Common::HashableStruct { + explicit PicaFixedGSConfig(const Pica::Regs& regs) { + state.Init(regs); + } +}; + +/** + * This struct combines the vertex and fragment states for a complete pipeline cache key + */ +struct VKPipelineCacheKey { + VKPipelineCacheKey(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) : + vertex_config(regs.vs, setup), fragment_config(PicaFSConfig::BuildFromRegs(regs)) {} + + PicaVSConfig vertex_config; + PicaFSConfig fragment_config; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept { + return k.Hash(); + } +}; + +template <> +struct hash { + std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_vulkan/vk_state.h b/src/video_core/renderer_vulkan/vk_state.h index 5c8d85b66..0a9caab34 100644 --- a/src/video_core/renderer_vulkan/vk_state.h +++ b/src/video_core/renderer_vulkan/vk_state.h @@ -41,6 +41,15 @@ constexpr uint ShadowTextureNZ = 6; class VulkanState { public: + struct Messenger { + bool cull_state; + bool depth_state; + bool color_mask; + bool stencil_state; + bool logic_op; + bool texture_state; + }; + struct { bool enabled; vk::CullModeFlags mode; @@ -130,18 +139,8 @@ public: return cur_state; } - /// Apply this state as the current OpenGL state - void Apply() const; - - /// Resets any references to the given resource - VulkanState& ResetTexture(GLuint handle); - VulkanState& ResetSampler(GLuint handle); - VulkanState& ResetProgram(GLuint handle); - VulkanState& ResetPipeline(GLuint handle); - VulkanState& ResetBuffer(GLuint handle); - VulkanState& ResetVertexArray(GLuint handle); - VulkanState& ResetFramebuffer(GLuint handle); - VulkanState& ResetRenderbuffer(GLuint handle); + /// Apply all dynamic state to the provided Vulkan command buffer + void Apply(vk::CommandBuffer& command_buffer) const; private: static VulkanState cur_state; diff --git a/src/video_core/renderer_vulkan/vk_surface_params.cpp b/src/video_core/renderer_vulkan/vk_surface_params.cpp new file mode 100644 index 000000000..a2c297c9a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_surface_params.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" +#include "video_core/renderer_vulkan/vk_surface_params.h" + +namespace Vulkan { + +SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { + SurfaceParams params = *this; + const u32 tiled_size = is_tiled ? 8 : 1; + const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size); + PAddr aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); + PAddr aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); + + if (aligned_end - aligned_start > stride_tiled_bytes) { + params.addr = aligned_start; + params.height = (aligned_end - aligned_start) / BytesInPixels(stride); + } else { + // 1 row + ASSERT(aligned_end - aligned_start == stride_tiled_bytes); + const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); + aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); + aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); + params.addr = aligned_start; + params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size; + params.stride = params.width; + params.height = tiled_size; + } + params.UpdateParams(); + + return params; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle unscaled_rect) const { + if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { + return {}; + } + + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; + unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; + unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; + } + + const u32 stride_tiled = !is_tiled ? stride : stride * 8; + + const u32 pixel_offset = + stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + + unscaled_rect.left; + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); + + return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; +} + +SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { + SurfaceInterval result{}; + const auto valid_regions = + SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1)), + addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1))}; + + if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), + addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), + }; + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + + if (is_tiled) { + const int x0 = (begin_pixel_index % (stride * 8)) / 8; + const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom + return Common::Rectangle(x0, height - y0, x0 + sub_surface.width, + height - (y0 + sub_surface.height)); + } + + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + // Bottom to top + return Common::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); +} + +Common::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return std::tie(other_surface.addr, other_surface.width, other_surface.height, + other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == + std::tie(addr, width, height, stride, pixel_format, is_tiled) && + pixel_format != PixelFormat::Invalid; +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + return sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).right <= stride; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && + addr <= expanded_surface.end && expanded_surface.addr <= end && + is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % + BytesInPixels(stride * (is_tiled ? 8 : 1)) == + 0; +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || + end < texcopy_params.end) { + return false; + } + if (texcopy_params.width != texcopy_params.stride) { + const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1)); + return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && + ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; + } + return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_surface_params.h b/src/video_core/renderer_vulkan/vk_surface_params.h new file mode 100644 index 000000000..b041cce57 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_surface_params.h @@ -0,0 +1,270 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_texturing.h" + +namespace Vulkan { + +struct CachedSurface; +using Surface = std::shared_ptr; + +using SurfaceInterval = boost::icl::right_open_interval; + +struct SurfaceParams { +private: + static constexpr std::array BPP_TABLE = { + 32, // RGBA8 + 24, // RGB8 + 16, // RGB5A1 + 16, // RGB565 + 16, // RGBA4 + 16, // IA8 + 16, // RG8 + 8, // I8 + 8, // A8 + 8, // IA4 + 4, // I4 + 4, // A4 + 4, // ETC1 + 8, // ETC1A4 + 16, // D16 + 0, + 24, // D24 + 32, // D24S8 + }; + +public: + enum class PixelFormat { + // First 5 formats are shared between textures and color buffers + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + + // Texture-only formats + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, + + // Depth buffer-only formats + D16 = 14, + // gap + D24 = 16, + D24S8 = 17, + + Invalid = 255, + }; + + enum class SurfaceType { + Color = 0, + Texture = 1, + Depth = 2, + DepthStencil = 3, + Fill = 4, + Invalid = 5 + }; + + static constexpr unsigned int GetFormatBpp(PixelFormat format) { + const auto format_idx = static_cast(format); + DEBUG_ASSERT_MSG(format_idx < BPP_TABLE.size(), "Invalid pixel format {}", format_idx); + return BPP_TABLE[format_idx]; + } + + unsigned int GetFormatBpp() const { + return GetFormatBpp(pixel_format); + } + + static std::string_view PixelFormatAsString(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8: + return "RGBA8"; + case PixelFormat::RGB8: + return "RGB8"; + case PixelFormat::RGB5A1: + return "RGB5A1"; + case PixelFormat::RGB565: + return "RGB565"; + case PixelFormat::RGBA4: + return "RGBA4"; + case PixelFormat::IA8: + return "IA8"; + case PixelFormat::RG8: + return "RG8"; + case PixelFormat::I8: + return "I8"; + case PixelFormat::A8: + return "A8"; + case PixelFormat::IA4: + return "IA4"; + case PixelFormat::I4: + return "I4"; + case PixelFormat::A4: + return "A4"; + case PixelFormat::ETC1: + return "ETC1"; + case PixelFormat::ETC1A4: + return "ETC1A4"; + case PixelFormat::D16: + return "D16"; + case PixelFormat::D24: + return "D24"; + case PixelFormat::D24S8: + return "D24S8"; + default: + return "Not a real pixel format"; + } + } + + static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { + return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) { + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) { + return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) + : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { + switch (format) { + // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat + case GPU::Regs::PixelFormat::RGB565: + return PixelFormat::RGB565; + case GPU::Regs::PixelFormat::RGB5A1: + return PixelFormat::RGB5A1; + default: + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + } + + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { + SurfaceType a_type = GetFormatType(pixel_format_a); + SurfaceType b_type = GetFormatType(pixel_format_b); + + if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && + (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + return true; + } + + if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { + return true; + } + + if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { + return true; + } + + return false; + } + + static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { + if ((unsigned int)pixel_format < 5) { + return SurfaceType::Color; + } + + if ((unsigned int)pixel_format < 14) { + return SurfaceType::Texture; + } + + if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { + return SurfaceType::Depth; + } + + if (pixel_format == PixelFormat::D24S8) { + return SurfaceType::DepthStencil; + } + + return SurfaceType::Invalid; + } + + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" + /// and "pixel_format" + void UpdateParams() { + if (stride == 0) { + stride = width; + } + type = GetFormatType(pixel_format); + size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) + : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); + end = addr + size; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval(addr, end); + } + + // Returns the outer rectangle containing "interval" + SurfaceParams FromInterval(SurfaceInterval interval) const; + + SurfaceInterval GetSubRectInterval(Common::Rectangle unscaled_rect) const; + + // Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + + u32 GetScaledWidth() const { + return width * res_scale; + } + + u32 GetScaledHeight() const { + return height * res_scale; + } + + Common::Rectangle GetRect() const { + return {0, height, width, 0}; + } + + Common::Rectangle GetScaledRect() const { + return {0, GetScaledHeight(), GetScaledWidth(), 0}; + } + + u32 PixelsInBytes(u32 size) const { + return size * CHAR_BIT / GetFormatBpp(pixel_format); + } + + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + Common::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + Common::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index ffaa00455..c2d3fbc48 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -72,7 +72,6 @@ private: public: // Window attributes - GLFWwindow* window; uint32_t width = 0, height = 0; bool framebuffer_resized = false; std::string_view name; diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index 9399bc8cf..f9512ab8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -1,16 +1,20 @@ -#include "vk_texture.h" -#include "vk_buffer.h" -#include "vk_context.h" +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. -VkTexture::VkTexture(const std::shared_ptr& context) : - context(context), staging(context) -{ -} +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/renderer_vulkan/vk_instance.h" -void VkTexture::create(int width_, int height_, vk::ImageType type, vk::Format format_) +namespace Vulkan { + +void VKTexture::Create(const Info& info) { - auto& device = context->device; - format = format_; width = width_; height = height_; + auto& device = g_vk_instace->GetDevice(); + format = info.format; + width = info.width; + height = info.height; switch (format) { @@ -23,57 +27,73 @@ void VkTexture::create(int width_, int height_, vk::ImageType type, vk::Format f channels = 3; break; default: - throw std::runtime_error("[VK] Unknown texture format"); + LOG_CRITICAL(Render_Vulkan, "Unknown texture format {}", format); } - int image_size = width * height * channels; - staging.create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + // Create staging memory buffer for pixel transfers + u32 image_size = width * height * channels; + staging.Create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::BufferUsageFlagBits::eTransferSrc); pixels = staging.memory; // Create the texture + vk::ImageCreateFlags flags = info.view_type == vk::ImageViewType::eCube ? vk::ImageCreateFlagBits::eCubeCompatible : {}; vk::ImageCreateInfo image_info ( - {}, - type, + flags, + info.type, format, - { width, height, 1 }, 1, 1, + { width, height, 1 }, info.mipmap_levels, info.array_layers, vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal, vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled ); - texture = device->createImageUnique(image_info); + texture = device.createImageUnique(image_info); // Create texture memory - auto requirements = device->getImageMemoryRequirements(texture.get()); - auto memory_index = Buffer::find_memory_type(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal, context); + auto requirements = device.getImageMemoryRequirements(texture.get()); + auto memory_index = VKBuffer::FindMemoryType(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal); vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index); - texture_memory = device->allocateMemoryUnique(alloc_info); - device->bindImageMemory(texture.get(), texture_memory.get(), 0); + texture_memory = device.allocateMemoryUnique(alloc_info); + device.bindImageMemory(texture.get(), texture_memory.get(), 0); // Create texture view - vk::ImageViewCreateInfo view_info({}, texture.get(), vk::ImageViewType::e1D, format, {}, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); - texture_view = device->createImageViewUnique(view_info); + vk::ImageViewCreateInfo view_info({}, texture.get(), info.view_type, format, {}, + vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); + texture_view = device.createImageViewUnique(view_info); // Create texture sampler - auto props = context->physical_device.getProperties(); - vk::SamplerCreateInfo sampler_info({}, vk::Filter::eNearest, vk::Filter::eNearest, vk::SamplerMipmapMode::eNearest, vk::SamplerAddressMode::eClampToEdge, - vk::SamplerAddressMode::eClampToEdge, vk::SamplerAddressMode::eClampToEdge, {}, true, props.limits.maxSamplerAnisotropy, - false, vk::CompareOp::eAlways, {}, {}, vk::BorderColor::eIntOpaqueBlack, false); + auto properties = g_vk_instace->GetPhysicalDevice().getProperties(); + vk::SamplerCreateInfo sampler_info + ( + {}, + info.sampler_info.mag_filter, + info.sampler_info.min_filter, + info.sampler_info.mipmap_mode, + info.sampler_info.wrapping[0], info.sampler_info.wrapping[1], info.sampler_info.wrapping[2], + {}, + true, + properties.limits.maxSamplerAnisotropy, + false, + vk::CompareOp::eAlways, + {}, + {}, + vk::BorderColor::eIntOpaqueBlack, + false + ); - texture_sampler = device->createSamplerUnique(sampler_info); + texture_sampler = device.createSamplerUnique(sampler_info); } -void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout new_layout) +void VKTexture::TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout) { - auto& device = context->device; - auto& queue = context->graphics_queue; + auto& device = g_vk_instace->GetDevice(); + auto& queue = g_vk_instace->graphics_queue; - vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); - vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0]; + vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); + vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0]; command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); @@ -82,25 +102,23 @@ void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout ne std::array barriers = { barrier }; vk::PipelineStageFlags source_stage, destination_stage; - if (old_layout == vk::ImageLayout::eUndefined && new_layout == vk::ImageLayout::eTransferDstOptimal) - { + if (old_layout == vk::ImageLayout::eUndefined && new_layout == vk::ImageLayout::eTransferDstOptimal) { barrier.srcAccessMask = vk::AccessFlagBits::eNone; barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; source_stage = vk::PipelineStageFlagBits::eTopOfPipe; destination_stage = vk::PipelineStageFlagBits::eTransfer; } - else if (old_layout == vk::ImageLayout::eTransferDstOptimal && new_layout == vk::ImageLayout::eShaderReadOnlyOptimal) - { + else if (old_layout == vk::ImageLayout::eTransferDstOptimal && new_layout == vk::ImageLayout::eShaderReadOnlyOptimal) { barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; source_stage = vk::PipelineStageFlagBits::eTransfer; destination_stage = vk::PipelineStageFlagBits::eFragmentShader; } - else - { - throw std::invalid_argument("[VK] Unsupported layout transition!"); + else { + LOG_CRITICAL(Render_Vulkan, "Unsupported layout transition"); + UNREACHABLE(); } command_buffer.pipelineBarrier(source_stage, destination_stage, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); @@ -110,23 +128,23 @@ void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout ne queue.submit(submit_info, nullptr); queue.waitIdle(); - device->freeCommandBuffers(context->command_pool.get(), command_buffer); + device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer); } -void VkTexture::copy_pixels(uint8_t* new_pixels, uint32_t count) +void VKTexture::CopyPixels(std::span new_pixels) { - auto& device = context->device; - auto& queue = context->graphics_queue; + auto& device = g_vk_instace->GetDevice(); + auto& queue = g_vk_instace->graphics_queue; // Transition image to transfer format - transition_layout(vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal); + TransitionLayout(vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal); // Copy pixels to staging buffer - std::memcpy(pixels, new_pixels, count * channels); + std::memcpy(pixels, new_pixels.data(), new_pixels.size() * channels); // Copy the staging buffer to the image - vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); - vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0]; + vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1); + vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0]; command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); @@ -140,8 +158,10 @@ void VkTexture::copy_pixels(uint8_t* new_pixels, uint32_t count) queue.submit(submit_info, nullptr); queue.waitIdle(); - device->freeCommandBuffers(context->command_pool.get(), command_buffer); + device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer); // Prepare for shader reads - transition_layout(vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); + TransitionLayout(vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal); +} + } diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index cabfe27f8..c91c31966 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -1,29 +1,55 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #pragma once -#include "vk_buffer.h" + #include #include +#include "video_core/renderer_vulkan/vk_buffer.h" -class VkContext; +namespace Vulkan { -class VkTexture : public NonCopyable, public Resource -{ - friend class VkContext; +struct SamplerInfo { + std::array wrapping = { vk::SamplerAddressMode::eClampToEdge }; + vk::Filter min_filter = vk::Filter::eLinear; + vk::Filter mag_filter = vk::Filter::eLinear; + vk::SamplerMipmapMode mipmap_mode = vk::SamplerMipmapMode::eLinear; +}; + +/// Vulkan texture object +class VKTexture final : public NonCopyable { public: - VkTexture(const std::shared_ptr& context); - ~VkTexture() = default; + /// Information for the creation of the target texture + struct Info { + u32 width, height; + vk::Format format; + vk::ImageType type; + vk::ImageViewType view_type; + u32 mipmap_levels = 1; + u32 array_layers = 1; + SamplerInfo sampler_info = {}; + }; - void create(int width, int height, vk::ImageType type, vk::Format format = vk::Format::eR8G8B8A8Uint); - void copy_pixels(std::span pixels); + VKTexture() = default; + VKTexture(VKTexture&&) = default; + ~VKTexture() = default; + + /// Create a new Vulkan texture object along with its sampler + void Create(const Info& info); + + /// Copies CPU side pixel data to the GPU texture buffer + void CopyPixels(std::span pixels); private: - void transition_layout(vk::ImageLayout old_layout, vk::ImageLayout new_layout); + /// Used to transition the image to an optimal layout during transfers + void TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout); private: // Texture buffer void* pixels = nullptr; - std::shared_ptr context; uint32_t width = 0, height = 0, channels = 0; - Buffer staging; + VKBuffer staging; // Texture objects vk::UniqueImage texture; @@ -32,3 +58,24 @@ private: vk::UniqueSampler texture_sampler; vk::Format format; }; + +/// Vulkan framebuffer object similar to an FBO in OpenGL +class VKFramebuffer final : public NonCopyable { +public: + VKFramebuffer() = default; + ~VKFramebuffer() = default; + + // Create Vulkan framebuffer object + void Create(u32 width, u32 height, u32 layers, u32 samples); + + VkRect2D GetRect() const { return VkRect2D{{0, 0}, {width, height}}; } + +private: + u32 width, height; + vk::UniqueFramebuffer framebuffer; + vk::RenderPass load_renderpass; + vk::RenderPass discard_renderpass; + vk::RenderPass clear_renderpass; +}; + +}