renderer_vulkan: Implement basic utility classes

*  These first few commits are mostly code dumps until the renderer interface
is finalized. This commits adds some basic Vulkan objects like textures, buffers,
sampler and translates some simpler OpenGL stuff to Vulkan.

* In addition a simple resource cache is implemented that handles recycling
of samplers, renderpasses and also stores the constant descriptor layout infos.
I've added all the resources I could deduce being used in shaders though
I might be missing something. I'm still divided though on whether the resource
cache should also handle pipelines.
This commit is contained in:
GPUCode
2022-04-28 18:14:13 +03:00
parent c89434627a
commit 84f97fc77e
26 changed files with 6925 additions and 760 deletions

View File

@ -56,6 +56,9 @@ protected:
constexpr NonCopyable() = default;
~NonCopyable() = default;
// Enable std::move operations
NonCopyable(NonCopyable&&) = default;
NonCopyable(const NonCopyable&) = delete;
NonCopyable& operator=(const NonCopyable&) = delete;
};

View File

@ -73,12 +73,23 @@ add_library(video_core STATIC
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_buffer.cpp
renderer_vulkan/vk_buffer.h
renderer_vulkan/vk_context.cpp
renderer_vulkan/vk_context.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_resource_cache.cpp
renderer_vulkan/vk_resource_cache.h
renderer_vulkan/vk_rasterizer_cache.cpp
renderer_vulkan/vk_rasterizer_cache.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_pipeline.cpp
renderer_vulkan/vk_pipeline.h
renderer_vulkan/vk_pipeline_manager.h
renderer_vulkan/vk_pipeline_manager.cpp
renderer_vulkan/vk_shader_state.h
renderer_vulkan/vk_state.cpp
renderer_vulkan/vk_state.h
renderer_vulkan/vk_surface_params.cpp
renderer_vulkan/vk_surface_params.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture.cpp

View File

@ -1,64 +1,59 @@
#include "vk_buffer.h"
#include "vk_context.h"
#include <cassert>
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include <algorithm>
#include <type_traits>
#include <cstring>
Buffer::Buffer(std::shared_ptr<VkContext> context) :
context(context)
namespace Vulkan {
VKBuffer::~VKBuffer()
{
if (memory != nullptr) {
g_vk_instace->GetDevice().unmapMemory(buffer_memory.get());
}
}
Buffer::~Buffer()
void VKBuffer::Create(uint32_t byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, vk::Format view_format)
{
auto& device = context->device;
if (memory != nullptr)
device->unmapMemory(buffer_memory.get());
}
void Buffer::create(uint32_t byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage)
{
auto& device = context->device;
auto& device = g_vk_instace->GetDevice();
size = byte_count;
vk::BufferCreateInfo bufferInfo({}, byte_count, usage);
buffer = device->createBufferUnique(bufferInfo);
buffer = device.createBufferUnique(bufferInfo);
auto mem_requirements = device->getBufferMemoryRequirements(buffer.get());
auto mem_requirements = device.getBufferMemoryRequirements(buffer.get());
auto memory_type_index = find_memory_type(mem_requirements.memoryTypeBits, properties, context);
auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, properties);
vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index);
buffer_memory = device->allocateMemoryUnique(alloc_info);
device->bindBufferMemory(buffer.get(), buffer_memory.get(), 0);
buffer_memory = device.allocateMemoryUnique(alloc_info);
device.bindBufferMemory(buffer.get(), buffer_memory.get(), 0);
// Optionally map the buffer to CPU memory
if (properties & vk::MemoryPropertyFlagBits::eHostVisible)
memory = device->mapMemory(buffer_memory.get(), 0, byte_count);
memory = device.mapMemory(buffer_memory.get(), 0, byte_count);
// Create buffer view for texel buffers
if (usage & vk::BufferUsageFlagBits::eStorageTexelBuffer || usage & vk::BufferUsageFlagBits::eUniformTexelBuffer)
{
vk::BufferViewCreateInfo view_info({}, buffer.get(), vk::Format::eR32Uint, 0, byte_count);
buffer_view = device->createBufferViewUnique(view_info);
vk::BufferViewCreateInfo view_info({}, buffer.get(), view_format, 0, byte_count);
buffer_view = device.createBufferViewUnique(view_info);
}
}
void Buffer::bind(vk::CommandBuffer& command_buffer)
void VKBuffer::CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region)
{
vk::DeviceSize offsets[1] = { 0 };
command_buffer.bindVertexBuffers(0, 1, &buffer.get(), offsets);
}
auto& device = g_vk_instace->GetDevice();
auto& queue = g_vk_instace->graphics_queue;
void Buffer::copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::BufferCopy& region)
{
auto& context = src_buffer.context;
auto& device = context->device;
auto& queue = context->graphics_queue;
vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1);
vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0];
vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1);
vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0];
command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
command_buffer.copyBuffer(src_buffer.buffer.get(), dst_buffer.buffer.get(), region);
@ -68,12 +63,12 @@ void Buffer::copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::Buffe
queue.submit(submit_info, nullptr);
queue.waitIdle();
device->freeCommandBuffers(context->command_pool.get(), command_buffer);
device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer);
}
uint32_t Buffer::find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr<VkContext> context)
uint32_t VKBuffer::FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties)
{
vk::PhysicalDeviceMemoryProperties mem_properties = context->physical_device.getMemoryProperties();
vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties();
for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++)
{
@ -82,32 +77,8 @@ uint32_t Buffer::find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags
return i;
}
throw std::runtime_error("[VK] Failed to find suitable memory type!");
LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type.");
UNREACHABLE();
}
VertexBuffer::VertexBuffer(const std::shared_ptr<VkContext>& context) :
host(context), local(context), context(context)
{
}
void VertexBuffer::create(uint32_t vertex_count)
{
// Create a host and local buffer
auto byte_count = sizeof(Vertex) * vertex_count;
local.create(byte_count, vk::MemoryPropertyFlagBits::eDeviceLocal,
vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eVertexBuffer);
host.create(byte_count, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::BufferUsageFlagBits::eTransferSrc);
}
void VertexBuffer::copy_vertices(Vertex* vertices, uint32_t count)
{
auto byte_count = count * sizeof(Vertex);
std::memcpy(host.memory, vertices, byte_count);
Buffer::copy_buffer(host, local, { 0, 0, byte_count });
}
void VertexBuffer::bind(vk::CommandBuffer& command_buffer)
{
local.bind(command_buffer);
}

View File

@ -1,48 +1,28 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vulkan/vulkan.hpp>
#include <glm/glm.hpp>
#include <memory>
#include <vector>
#include <vulkan/vulkan.hpp>
#include "common/common_types.h"
class VkContext;
namespace Vulkan {
struct VertexInfo
{
VertexInfo() = default;
VertexInfo(glm::vec3 position, glm::vec3 color, glm::vec2 coords) :
position(position), color(color), texcoords(coords) {};
glm::vec3 position;
glm::vec3 color;
glm::vec2 texcoords;
};
struct Vertex : public VertexInfo
{
Vertex() = default;
Vertex(glm::vec3 position, glm::vec3 color = {}, glm::vec2 coords = {}) : VertexInfo(position, color, coords) {};
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo));
static constexpr std::array<vk::VertexInputAttributeDescription, 3> attribute_desc =
{
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, position)),
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, color)),
vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, texcoords)),
};
};
class Buffer : public NonCopyable, public Resource
{
friend class VertexBuffer;
/// Generic Vulkan buffer object used by almost every resource
class VKBuffer final : public NonCopyable {
public:
Buffer(std::shared_ptr<VkContext> context);
~Buffer();
VKBuffer() = default;
VKBuffer(VKBuffer&&) = default;
~VKBuffer();
void create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage);
void bind(vk::CommandBuffer& command_buffer);
/// Create a generic Vulkan buffer object
void Create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, vk::Format view_format = vk::Format::eUndefined);
static uint32_t find_memory_type(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr<VkContext> context);
static void copy_buffer(Buffer& src_buffer, Buffer& dst_buffer, const vk::BufferCopy& region);
static uint32_t FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties);
static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region);
public:
void* memory = nullptr;
@ -50,22 +30,6 @@ public:
vk::UniqueDeviceMemory buffer_memory;
vk::UniqueBufferView buffer_view;
uint32_t size = 0;
protected:
std::shared_ptr<VkContext> context;
};
class VertexBuffer
{
public:
VertexBuffer(const std::shared_ptr<VkContext>& context);
~VertexBuffer() = default;
void create(uint32_t vertex_count);
void copy_vertices(Vertex* vertices, uint32_t count);
void bind(vk::CommandBuffer& command_buffer);
private:
Buffer host, local;
std::shared_ptr<VkContext> context;
};
}

View File

@ -1,75 +0,0 @@
#pragma once
#include "vk_swapchain.h"
#include <string_view>
#include <unordered_map>
class VkWindow;
class VkContext;
constexpr int MAX_BINDING_COUNT = 10;
struct PipelineLayoutInfo
{
friend class VkContext;
PipelineLayoutInfo(const std::shared_ptr<VkContext>& context);
~PipelineLayoutInfo();
void add_shader_module(std::string_view filepath, vk::ShaderStageFlagBits stage);
void add_resource(Resource* resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int binding, int group = 0);
private:
using DescInfo = std::pair<std::array<Resource*, MAX_BINDING_COUNT>, std::vector<vk::DescriptorSetLayoutBinding>>;
std::shared_ptr<VkContext> context;
std::unordered_map<int, DescInfo> resource_types;
std::unordered_map<vk::DescriptorType, int> needed;
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
};
class VkTexture;
// The vulkan context. Can only be created by the window
class VkContext
{
friend class VkWindow;
public:
VkContext(vk::UniqueInstance&& instance, VkWindow* window);
~VkContext();
void create(SwapchainInfo& info);
void create_graphics_pipeline(PipelineLayoutInfo& info);
vk::CommandBuffer& get_command_buffer();
private:
void create_devices(int device_id = 0);
void create_renderpass();
void create_command_buffers();
void create_decriptor_sets(PipelineLayoutInfo& info);
public:
// Queue family indexes
uint32_t queue_family = -1;
// Core vulkan objects
vk::UniqueInstance instance;
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;
vk::Queue graphics_queue;
// Pipeline
vk::UniquePipelineLayout pipeline_layout;
vk::UniquePipeline graphics_pipeline;
vk::UniqueRenderPass renderpass;
vk::UniqueDescriptorPool descriptor_pool;
std::array<std::vector<vk::DescriptorSetLayout>, MAX_FRAMES_IN_FLIGHT> descriptor_layouts;
std::array<std::vector<vk::DescriptorSet>, MAX_FRAMES_IN_FLIGHT> descriptor_sets;
// Command buffer
vk::UniqueCommandPool command_pool;
std::vector<vk::UniqueCommandBuffer> command_buffers;
// Window
VkWindow* window;
SwapchainInfo swapchain_info;
};

View File

@ -0,0 +1,72 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vulkan/vulkan.hpp>
#include <string>
#include <unordered_map>
#include <memory>
#include "common/common_types.h"
namespace Vulkan {
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames. There doesn't seem to be much of a downside to a larger
// number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine
#ifdef ANDROID
// Reduce the size of swap_chain, since the UI only allows upto 200% speed.
constexpr std::size_t SWAP_CHAIN_SIZE = 6;
#else
constexpr std::size_t SWAP_CHAIN_SIZE = 9;
#endif
/// The global Vulkan instance
class VKInstance
{
public:
VKInstance() = default;
~VKInstance();
/// Construct global Vulkan context
void Create(vk::UniqueInstance instance, vk::PhysicalDevice gpu, vk::UniqueSurfaceKHR surface,
bool enable_debug_reports, bool enable_validation_layer);
vk::Device& GetDevice() { return device.get(); }
vk::PhysicalDevice& GetPhysicalDevice() { return physical_device; }
/// Get a valid command buffer for the current frame
vk::CommandBuffer& GetCommandBuffer();
/// Feature support
bool SupportsAnisotropicFiltering() const;
private:
void CreateDevices(int device_id = 0);
void CreateRenderpass();
void CreateCommandBuffers();
public:
// Queue family indexes
u32 queue_family = -1;
// Core vulkan objects
vk::UniqueInstance instance;
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;
vk::Queue graphics_queue;
// Pipeline
vk::UniqueDescriptorPool descriptor_pool;
std::array<std::vector<vk::DescriptorSetLayout>, SWAP_CHAIN_SIZE> descriptor_layouts;
std::array<std::vector<vk::DescriptorSet>, SWAP_CHAIN_SIZE> descriptor_sets;
// Command buffer
vk::UniqueCommandPool command_pool;
std::vector<vk::UniqueCommandBuffer> command_buffers;
};
extern std::unique_ptr<VKInstance> g_vk_instace;
} // namespace Vulkan

View File

@ -0,0 +1,61 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include <shaderc/shaderc.hpp>
namespace Vulkan {
shaderc::Compiler compiler;
void VKPipeline::Info::AddShaderModule(const vk::ShaderModule& source, vk::ShaderStageFlagBits stage)
{
shaderc_shader_kind shader_stage;
std::string name;
switch (stage)
{
case vk::ShaderStageFlagBits::eVertex:
shader_stage = shaderc_glsl_vertex_shader;
name = "Vertex shader";
break;
case vk::ShaderStageFlagBits::eCompute:
shader_stage = shaderc_glsl_compute_shader;
name = "Compute shader";
break;
case vk::ShaderStageFlagBits::eFragment:
shader_stage = shaderc_glsl_fragment_shader;
name = "Fragment shader";
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown vulkan shader stage {}", stage);
UNREACHABLE();
}
shaderc::CompileOptions options;
options.SetOptimizationLevel(shaderc_optimization_level_performance);
options.SetAutoBindUniforms(true);
options.SetAutoMapLocations(true);
options.SetTargetEnvironment(shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_2);
auto result = compiler.CompileGlslToSpv(source.c_str(), shader_stage, name.c_str(), options);
if (result.GetCompilationStatus() != shaderc_compilation_status_success) {
LOG_CRITICAL(Render_Vulkan, "Failed to compile GLSL shader with error: {}", result.GetErrorMessage());
UNREACHABLE();
}
auto shader_code = std::vector<uint32_t>{ result.cbegin(), result.cend() };
vk::ShaderModuleCreateInfo module_info({}, shader_code);
auto module = g_vk_instace->GetDevice().createShaderModuleUnique(module_info);
shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module.get(), "main");
return std::move(module);
}
} // namespace Vulkan

View File

@ -0,0 +1,49 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <utility>
#include <variant>
#include "video_core/renderer_vulkan/vk_buffer.h"
#include "video_core/renderer_vulkan/vk_texture.h"
namespace Vulkan {
using Resource = std::variant<VKBuffer, VKTexture>;
/// Vulkan pipeline objects represent a collection of shader modules
class VKPipeline final : private NonCopyable {
public:
/// Includes all required information to build a Vulkan pipeline object
class Info : private NonCopyable {
Info() = default;
~Info() = default;
/// Assign a shader module to a specific stage
void AddShaderModule(const vk::ShaderModule& module, vk::ShaderStageFlagBits stage);
/// Add a texture or a buffer to the target descriptor set
void AddResource(const Resource& resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int set = 0);
private:
using ResourceInfo = std::pair<std::reference_wrapper<Resource>, vk::DescriptorSetLayoutBinding>;
std::unordered_map<int, std::vector<ResourceInfo>> descriptor_sets;
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
};
VKPipeline() = default;
~VKPipeline() = default;
/// Create a new Vulkan pipeline object
void Create(const Info& info);
void Create(vk::PipelineLayoutCreateInfo layout_info);
private:
vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout;
};
} // namespace OpenGL

View File

@ -0,0 +1,668 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "video_core/video_core.h"
#include "core/frontend/scope_acquire_context.h"
#include "video_core/renderer_vulkan/vk_pipeline_manager.h"
#include <boost/functional/hash.hpp>
#include <boost/variant.hpp>
#include <algorithm>
#include <thread>
#include <unordered_map>
namespace Vulkan {
static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) {
std::size_t hash = 0;
u64 regs_uid = Common::ComputeHash64(regs.reg_array.data(), Pica::Regs::NUM_REGS * sizeof(u32));
boost::hash_combine(hash, regs_uid);
if (code.size() > 0) {
u64 code_uid = Common::ComputeHash64(code.data(), code.size() * sizeof(u32));
boost::hash_combine(hash, code_uid);
}
return static_cast<u64>(hash);
}
static OGLProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats,
bool separable) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
return {};
}
auto shader = OGLProgram();
shader.handle = glCreateProgram();
if (separable) {
glProgramParameteri(shader.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
}
glProgramBinary(shader.handle, dump.binary_format, dump.binary.data(),
static_cast<GLsizei>(dump.binary.size()));
GLint link_status{};
glGetProgramiv(shader.handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
return {};
}
return shader;
}
static std::set<GLenum> GetSupportedFormats() {
std::set<GLenum> supported_formats;
GLint num_formats{};
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
for (const GLint format : formats)
supported_formats.insert(static_cast<GLenum>(format));
return supported_formats;
}
static std::tuple<PicaVSConfig, Pica::Shader::ShaderSetup> BuildVSConfigFromRaw(
const ShaderDiskCacheRaw& raw) {
Pica::Shader::ProgramCode program_code{};
Pica::Shader::SwizzleData swizzle_data{};
std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH,
program_code.begin());
std::copy_n(raw.GetProgramCode().begin() + Pica::Shader::MAX_PROGRAM_CODE_LENGTH,
Pica::Shader::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin());
Pica::Shader::ShaderSetup setup;
setup.program_code = program_code;
setup.swizzle_data = swizzle_data;
return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup};
}
static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
}
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got {}, expected {}",
static_cast<int>(ub_size), expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
sizeof(UniformData));
SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, sizeof(VSUniformData));
}
static void SetShaderSamplerBinding(GLuint shader, const char* name,
TextureUnits::TextureUnit binding) {
GLint uniform_tex = glGetUniformLocation(shader, name);
if (uniform_tex != -1) {
glUniform1i(uniform_tex, binding.id);
}
}
static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) {
GLint uniform_tex = glGetUniformLocation(shader, name);
if (uniform_tex != -1) {
glUniform1i(uniform_tex, static_cast<GLint>(binding));
}
}
static void SetShaderSamplerBindings(GLuint shader) {
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
cur_state.Apply();
// Set the texture samplers to correspond to different texture units
SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0));
SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1));
SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2));
SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
// Set the texture samplers to correspond to different lookup table texture units
SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA);
SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer);
SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX);
SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX);
SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY);
SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY);
SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ);
SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ);
cur_state.draw.shader_program = old_program;
cur_state.Apply();
}
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs,
const Pica::Shader::ShaderSetup& setup) {
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
[](bool value) -> BoolAligned { return {value ? GL_TRUE : GL_FALSE}; });
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),
[](const auto& value) -> GLuvec4 {
return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
});
std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f),
[](const auto& value) -> GLvec4 {
return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(),
value.w.ToFloat32()};
});
}
template <typename KeyConfigType,
ShaderDecompiler::ProgramResult (*CodeGenerator)(const KeyConfigType&, bool),
GLenum ShaderType>
class ShaderCache {
public:
explicit ShaderCache(bool separable) : separable(separable) {}
std::tuple<GLuint, std::optional<ShaderDecompiler::ProgramResult>> Get(
const KeyConfigType& config) {
auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable});
OGLShaderStage& cached_shader = iter->second;
std::optional<ShaderDecompiler::ProgramResult> result{};
if (new_shader) {
result = CodeGenerator(config, separable);
cached_shader.Create(result->code.c_str(), ShaderType);
}
return {cached_shader.GetHandle(), std::move(result)};
}
void Inject(const KeyConfigType& key, OGLProgram&& program) {
OGLShaderStage stage{separable};
stage.Inject(std::move(program));
shaders.emplace(key, std::move(stage));
}
void Inject(const KeyConfigType& key, OGLShaderStage&& stage) {
shaders.emplace(key, std::move(stage));
}
private:
bool separable;
std::unordered_map<KeyConfigType, OGLShaderStage> shaders;
};
// This is a cache designed for shaders translated from PICA shaders. The first cache matches the
// config structure like a normal cache does. On cache miss, the second cache matches the generated
// GLSL code. The configuration is like this because there might be leftover code in the PICA shader
// program buffer from the previous shader, which is hashed into the config, resulting several
// different config values from the same shader program.
template <typename KeyConfigType,
std::optional<ShaderDecompiler::ProgramResult> (*CodeGenerator)(
const Pica::Shader::ShaderSetup&, const KeyConfigType&, bool),
GLenum ShaderType>
class ShaderDoubleCache {
public:
explicit ShaderDoubleCache(bool separable) : separable(separable) {}
std::tuple<GLuint, std::optional<ShaderDecompiler::ProgramResult>> Get(
const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) {
std::optional<ShaderDecompiler::ProgramResult> result{};
auto map_it = shader_map.find(key);
if (map_it == shader_map.end()) {
auto program_opt = CodeGenerator(setup, key, separable);
if (!program_opt) {
shader_map[key] = nullptr;
return {0, std::nullopt};
}
std::string& program = program_opt->code;
auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{separable});
OGLShaderStage& cached_shader = iter->second;
if (new_shader) {
result.emplace();
result->code = program;
cached_shader.Create(program.c_str(), ShaderType);
}
shader_map[key] = &cached_shader;
return {cached_shader.GetHandle(), std::move(result)};
}
if (map_it->second == nullptr) {
return {0, std::nullopt};
}
return {map_it->second->GetHandle(), std::nullopt};
}
void Inject(const KeyConfigType& key, std::string decomp, OGLProgram&& program) {
OGLShaderStage stage{separable};
stage.Inject(std::move(program));
const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first;
OGLShaderStage& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
void Inject(const KeyConfigType& key, std::string decomp, OGLShaderStage&& stage) {
const auto iter = shader_cache.emplace(std::move(decomp), std::move(stage)).first;
OGLShaderStage& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
private:
bool separable;
std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
std::unordered_map<std::string, OGLShaderStage> shader_cache;
};
using ProgrammableVertexShaders =
ShaderDoubleCache<PicaVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
using FixedGeometryShaders =
ShaderCache<PicaFixedGSConfig, &GenerateFixedGeometryShader, GL_GEOMETRY_SHADER>;
using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
class ShaderProgramManager::Impl {
public:
explicit Impl(bool separable, bool is_amd)
: is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
fragment_shaders(separable), disk_cache(separable) {
if (separable)
pipeline.Create();
}
struct ShaderTuple {
GLuint vs = 0;
GLuint gs = 0;
GLuint fs = 0;
std::size_t vs_hash = 0;
std::size_t gs_hash = 0;
std::size_t fs_hash = 0;
bool operator==(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
}
bool operator!=(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs);
}
std::size_t GetConfigHash() const {
std::size_t hash = 0;
boost::hash_combine(hash, vs_hash);
boost::hash_combine(hash, gs_hash);
boost::hash_combine(hash, fs_hash);
return hash;
}
};
bool is_amd;
bool separable;
ShaderTuple current;
ProgrammableVertexShaders programmable_vertex_shaders;
TrivialVertexShader trivial_vertex_shader;
FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders;
std::unordered_map<u64, OGLProgram> program_cache;
OGLPipeline pipeline;
ShaderDiskCache disk_cache;
};
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
bool is_amd)
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
ShaderProgramManager::~ShaderProgramManager() = default;
bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup) {
PicaVSConfig config{regs.vs, setup};
auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup);
if (handle == 0)
return false;
impl->current.vs = handle;
impl->current.vs_hash = config.Hash();
// Save VS to the disk cache if its a new shader
if (result) {
auto& disk_cache = impl->disk_cache;
ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()};
program_code.insert(program_code.end(), setup.swizzle_data.begin(),
setup.swizzle_data.end());
const u64 unique_identifier = GetUniqueIdentifier(regs, program_code);
const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs,
std::move(program_code)};
disk_cache.SaveRaw(raw);
disk_cache.SaveDecompiled(unique_identifier, *result, VideoCore::g_hw_shader_accurate_mul);
}
return true;
}
void ShaderProgramManager::UseTrivialVertexShader() {
impl->current.vs = impl->trivial_vertex_shader.Get();
impl->current.vs_hash = 0;
}
void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) {
PicaFixedGSConfig gs_config(regs);
auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config);
impl->current.gs = handle;
impl->current.gs_hash = gs_config.Hash();
}
void ShaderProgramManager::UseTrivialGeometryShader() {
impl->current.gs = 0;
impl->current.gs_hash = 0;
}
void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
auto [handle, result] = impl->fragment_shaders.Get(config);
impl->current.fs = handle;
impl->current.fs_hash = config.Hash();
// Save FS to the disk cache if its a new shader
if (result) {
auto& disk_cache = impl->disk_cache;
u64 unique_identifier = GetUniqueIdentifier(regs, {});
ShaderDiskCacheRaw raw{unique_identifier, ProgramType::FS, regs, {}};
disk_cache.SaveRaw(raw);
disk_cache.SaveDecompiled(unique_identifier, *result, false);
}
}
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
if (impl->separable) {
if (impl->is_amd) {
// Without this reseting, AMD sometimes freezes when one stage is changed but not
// for the others. On the other hand, including this reset seems to introduce memory
// leak in Intel Graphics.
glUseProgramStages(
impl->pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
}
glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs);
glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs);
glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs);
state.draw.shader_program = 0;
state.draw.program_pipeline = impl->pipeline.handle;
} else {
const u64 unique_identifier = impl->current.GetConfigHash();
OGLProgram& cached_program = impl->program_cache[unique_identifier];
if (cached_program.handle == 0) {
cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs});
auto& disk_cache = impl->disk_cache;
disk_cache.SaveDumpToFile(unique_identifier, cached_program.handle,
VideoCore::g_hw_shader_accurate_mul);
SetShaderUniformBlockBindings(cached_program.handle);
SetShaderSamplerBindings(cached_program.handle);
}
state.draw.shader_program = cached_program.handle;
}
}
void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
if (!GLAD_GL_ARB_get_program_binary && !GLES) {
LOG_ERROR(Render_OpenGL,
"Cannot load disk cache as ARB_get_program_binary is not supported!");
return;
}
auto& disk_cache = impl->disk_cache;
const auto transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
const auto& raws = *transferable;
// Load uncompressed precompiled file for non-separable shaders.
// Precompiled file for separable shaders is compressed.
auto [decompiled, dumps] = disk_cache.LoadPrecompiled(impl->separable);
if (stop_loading) {
return;
}
std::set<GLenum> supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
std::mutex mutex;
std::atomic_bool compilation_failed = false;
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
std::vector<std::size_t> load_raws_index;
// Loads both decompiled and precompiled shaders from the cache. If either one is missing for
const auto LoadPrecompiledShader = [&](std::size_t begin, std::size_t end,
const std::vector<ShaderDiskCacheRaw>& raw_cache,
const ShaderDecompiledMap& decompiled_map,
const ShaderDumpsMap& dump_map) {
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
return;
}
const auto& raw{raw_cache[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash =
GetUniqueIdentifier(raw.GetRawShaderConfig(), raw.GetProgramCode());
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - removing "
"shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateAll();
return;
}
const auto dump{dump_map.find(unique_identifier)};
const auto decomp{decompiled_map.find(unique_identifier)};
OGLProgram shader;
if (dump != dump_map.end() && decomp != decompiled_map.end()) {
// Only load the vertex shader if its sanitize_mul setting matches
if (raw.GetProgramType() == ProgramType::VS &&
decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) {
continue;
}
// If the shader is dumped, attempt to load it
shader =
GeneratePrecompiledProgram(dump->second, supported_formats, impl->separable);
if (shader.handle == 0) {
// If any shader failed, stop trying to compile, delete the cache, and start
// loading from raws
compilation_failed = true;
return;
}
// we have both the binary shader and the decompiled, so inject it into the
// cache
if (raw.GetProgramType() == ProgramType::VS) {
auto [conf, setup] = BuildVSConfigFromRaw(raw);
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code,
std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(shader));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_CRITICAL(Frontend, "failed to load raw ProgramType {}",
raw.GetProgramType());
compilation_failed = true;
return;
}
} else {
// Since precompiled didn't have the dump, we'll load them in the next phase
std::scoped_lock lock(mutex);
load_raws_index.push_back(i);
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raw_cache.size());
}
}
};
const auto LoadPrecompiledProgram = [&](const ShaderDecompiledMap& decompiled_map,
const ShaderDumpsMap& dump_map) {
std::size_t i{0};
for (const auto& dump : dump_map) {
if (stop_loading) {
break;
}
const u64 unique_identifier{dump.first};
const auto decomp{decompiled_map.find(unique_identifier)};
// Only load the program if its sanitize_mul setting matches
if (decomp->second.sanitize_mul != VideoCore::g_hw_shader_accurate_mul) {
continue;
}
// If the shader program is dumped, attempt to load it
OGLProgram shader =
GeneratePrecompiledProgram(dump.second, supported_formats, impl->separable);
if (shader.handle != 0) {
SetShaderUniformBlockBindings(shader.handle);
SetShaderSamplerBindings(shader.handle);
impl->program_cache.emplace(unique_identifier, std::move(shader));
} else {
LOG_ERROR(Frontend, "Failed to link Precompiled program!");
compilation_failed = true;
break;
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, ++i, dump_map.size());
}
}
};
if (impl->separable) {
LoadPrecompiledShader(0, raws.size(), raws, decompiled, dumps);
} else {
LoadPrecompiledProgram(decompiled, dumps);
}
bool load_all_raws = false;
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
impl->program_cache.clear();
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
load_all_raws = true;
}
// TODO(SachinV): Skip loading raws until we implement a proper way to link non-seperable
// shaders.
if (!impl->separable) {
return;
}
const std::size_t load_raws_size = load_all_raws ? raws.size() : load_raws_index.size();
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, load_raws_size);
}
compilation_failed = false;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
Frontend::ScopeAcquireContext scope(*context);
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
return;
}
const std::size_t raws_index = load_all_raws ? i : load_raws_index[i];
const auto& raw{raws[raws_index]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
bool sanitize_mul = false;
GLuint handle{0};
std::optional<ShaderDecompiler::ProgramResult> result;
// Otherwise decompile and build the shader at boot and save the result to the
// precompiled file
if (raw.GetProgramType() == ProgramType::VS) {
auto [conf, setup] = BuildVSConfigFromRaw(raw);
result = GenerateVertexShader(setup, conf, impl->separable);
OGLShaderStage stage{impl->separable};
stage.Create(result->code.c_str(), GL_VERTEX_SHADER);
handle = stage.GetHandle();
sanitize_mul = conf.state.sanitize_mul;
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig());
result = GenerateFragmentShader(conf, impl->separable);
OGLShaderStage stage{impl->separable};
stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER);
handle = stage.GetHandle();
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(stage));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType());
compilation_failed = true;
return;
}
if (handle == 0) {
LOG_ERROR(Frontend, "compilation from raw failed {:x} {:x}",
raw.GetProgramCode().at(0), raw.GetProgramCode().at(1));
compilation_failed = true;
return;
}
std::scoped_lock lock(mutex);
// If this is a new separable shader, add it the precompiled cache
if (result) {
disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul);
disk_cache.SaveDump(unique_identifier, handle);
precompiled_cache_altered = true;
}
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, load_raws_size);
}
}
};
const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
const std::size_t bucket_size{load_raws_size / num_workers};
std::vector<std::unique_ptr<Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? load_raws_size : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(LoadRawSepareble, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) {
disk_cache.InvalidateAll();
}
if (precompiled_cache_altered) {
disk_cache.SaveVirtualPrecompiledFile();
}
}
} // namespace OpenGL

View File

@ -0,0 +1,131 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <variant>
#include <vulkan/vulkan.hpp>
#include <glm/glm.hpp>
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_lighting.h"
#include "video_core/renderer_vulkan/pica_to_vulkan.h"
#include "video_core/renderer_vulkan/vk_shader_state.h"
#include "video_core/renderer_vulkan/vk_texture.h"
namespace Core {
class System;
}
namespace Vulkan {
enum class UniformBindings : GLuint { Common, VS, GS };
struct LightSrc {
alignas(16) glm::vec3 specular_0;
alignas(16) glm::vec3 specular_1;
alignas(16) glm::vec3 diffuse;
alignas(16) glm::vec3 ambient;
alignas(16) glm::vec3 position;
alignas(16) glm::vec3 spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
alignas(16) glm::ivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4];
alignas(16) glm::vec3 fog_color;
alignas(8) glm::vec2 proctex_noise_f;
alignas(8) glm::vec2 proctex_noise_a;
alignas(8) glm::vec2 proctex_noise_p;
alignas(16) glm::vec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) glm::vec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) glm::vec4 tev_combiner_buffer_color;
alignas(16) glm::vec4 clip_coef;
};
static_assert(sizeof(UniformData) == 0x4F0, "The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// NOTE: the same rule from UniformData also applies here.
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
struct BoolAligned {
alignas(16) int b;
};
std::array<BoolAligned, 16> bools;
alignas(16) std::array<glm::uvec4, 4> i;
alignas(16) std::array<glm::vec4, 96> f;
};
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(sizeof(VSUniformData) == 1856, "The size of the VSUniformData structure has changed, update the structure in the shader");
static_assert(sizeof(VSUniformData) < 16384, "VSUniformData structure must be less than 16kb as per the Vulkan spec");
using Resource = std::variant<VKBuffer, VKTexture>;
/// Includes all required information to build a Vulkan pipeline object
class VKPipelineInfo : private NonCopyable {
VKPipelineInfo() = default;
~VKPipelineInfo() = default;
/// Assign a shader module to a specific stage
void AddShaderModule(const vk::ShaderModule& module, vk::ShaderStageFlagBits stage);
/// Add a texture or a buffer to the target descriptor set
void AddResource(const Resource& resource, vk::DescriptorType type, vk::ShaderStageFlags stages, int set = 0);
private:
using ResourceInfo = std::pair<std::reference_wrapper<Resource>, vk::DescriptorSetLayoutBinding>;
std::unordered_map<int, std::vector<ResourceInfo>> descriptor_sets;
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
};
/// A class that manages the storage and management of Vulkan pipeline objects.
class PipelineManager {
public:
PipelineManager(Frontend::EmuWindow& emu_window);
~PipelineManager();
/// Retrieves the Vulkan pipeline that maps to the current PICA state.
/// If not present, it is compiled and cached
vk::Pipeline GetPipeline(const Pica::Regs& config, Pica::Shader::ShaderSetup& setup);
private:
std::unordered_map<VKPipelineCacheKey, vk::UniquePipeline> pipelines;
vk::UniquePipelineCache pipeline_cache;
Frontend::EmuWindow& emu_window;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,338 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <cstring>
#include <memory>
#include <vector>
#include <vulkan/vulkan.hpp>
#include <glm/glm.hpp>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_texturing.h"
#include "video_core/shader/shader.h"
namespace Frontend {
class EmuWindow;
}
namespace Vulkan {
class ShaderProgramManager;
class RasterizerVulkan : public VideoCore::RasterizerInterface {
public:
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window);
~RasterizerVulkan() override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override;
void NotifyPicaRegisterChanged(u32 id) override;
void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override;
void InvalidateRegion(PAddr addr, u32 size) override;
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
void ClearAll(bool flush) override;
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, OpenGL::ScreenInfo& screen_info) override;
bool AccelerateDrawBatch(bool is_indexed) override;
/// Syncs entire status to match PICA registers
void SyncEntireState() override;
private:
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
OGLSampler sampler;
/// Creates the sampler object, initializing its state so that it's in sync with the
/// SamplerInfo struct.
void Create();
/// Syncs the sampler object with the config, updating any necessary state.
void SyncWithConfig(const TextureConfig& config);
private:
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color;
u32 lod_min;
u32 lod_max;
s32 lod_bias;
// TODO(wwylele): remove this once mipmap for cube is implemented
bool supress_mipmap_for_cube = false;
};
struct VertexInfo
{
VertexInfo() = default;
VertexInfo(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
position[0] = v.pos.x.ToFloat32();
position[1] = v.pos.y.ToFloat32();
position[2] = v.pos.z.ToFloat32();
position[3] = v.pos.w.ToFloat32();
color[0] = v.color.x.ToFloat32();
color[1] = v.color.y.ToFloat32();
color[2] = v.color.z.ToFloat32();
color[3] = v.color.w.ToFloat32();
tex_coord0[0] = v.tc0.x.ToFloat32();
tex_coord0[1] = v.tc0.y.ToFloat32();
tex_coord1[0] = v.tc1.x.ToFloat32();
tex_coord1[1] = v.tc1.y.ToFloat32();
tex_coord2[0] = v.tc2.x.ToFloat32();
tex_coord2[1] = v.tc2.y.ToFloat32();
tex_coord0_w = v.tc0_w.ToFloat32();
normquat[0] = v.quat.x.ToFloat32();
normquat[1] = v.quat.y.ToFloat32();
normquat[2] = v.quat.z.ToFloat32();
normquat[3] = v.quat.w.ToFloat32();
view[0] = v.view.x.ToFloat32();
view[1] = v.view.y.ToFloat32();
view[2] = v.view.z.ToFloat32();
if (flip_quaternion) {
normquat = -normquat;
}
}
glm::vec4 position;
glm::vec4 color;
glm::vec2 tex_coord0;
glm::vec2 tex_coord1;
glm::vec2 tex_coord2;
float tex_coord0_w;
glm::vec4 normquat;
glm::vec3 view;
};
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex : public VertexInfo
{
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexInfo(v, flip_quaternion) {};
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo));
static constexpr std::array<vk::VertexInputAttributeDescription, 8> attribute_desc =
{
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, position)),
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, color)),
vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord0)),
vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord1)),
vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord2)),
vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexInfo, tex_coord0_w)),
vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, normquat)),
vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, view)),
};
};
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
/// Syncs the blend functions to match the PICA register
void SyncBlendFuncs();
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture bias configuration to match the PICA register
void SyncProcTexBias();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
/// Syncs the color write mask to match the PICA register state
void SyncColorWriteMask();
/// Syncs the stencil write mask to match the PICA register state
void SyncStencilWriteMask();
/// Syncs the depth write mask to match the PICA register state
void SyncDepthWriteMask();
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(std::size_t tev_index,
const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs the shadow texture bias to match the PICA register
void SyncShadowTextureBias();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
bool Draw(bool accelerate, bool is_indexed);
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
struct VertexArrayInfo {
u32 vs_input_index_min;
u32 vs_input_index_max;
u32 vs_input_size;
};
/// Retrieve the range and the size of the input vertex
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
/// Setup vertex shader for AccelerateDrawBatch
bool SetupVertexShader();
/// Setup geometry shader for AccelerateDrawBatch
bool SetupGeometryShader();
bool is_amd;
OpenGLState state;
GLuint default_texture;
RasterizerCacheOpenGL res_cache;
std::vector<HardwareVertex> vertex_batch;
bool shader_dirty = true;
struct {
UniformData data;
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty;
bool lighting_lut_dirty_any;
bool fog_lut_dirty;
bool proctex_noise_lut_dirty;
bool proctex_color_map_dirty;
bool proctex_alpha_map_dirty;
bool proctex_lut_dirty;
bool proctex_diff_lut_dirty;
bool dirty;
} uniform_block_data = {};
std::unique_ptr<ShaderProgramManager> shader_program_manager;
// They shall be big enough for about one frame.
static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
std::array<bool, 16> hw_vao_enabled_attributes{};
std::array<SamplerInfo, 3> texture_samplers;
OGLStreamBuffer vertex_buffer;
OGLStreamBuffer uniform_buffer;
OGLStreamBuffer index_buffer;
OGLStreamBuffer texture_buffer;
OGLStreamBuffer texture_lf_buffer;
OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment;
std::size_t uniform_size_aligned_vs;
std::size_t uniform_size_aligned_fs;
SamplerInfo texture_cube_sampler;
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;
std::array<std::array<GLvec2, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{};
std::array<GLvec2, 128> fog_lut_data{};
std::array<GLvec2, 128> proctex_noise_lut_data{};
std::array<GLvec2, 128> proctex_color_map_data{};
std::array<GLvec2, 128> proctex_alpha_map_data{};
std::array<GLvec4, 256> proctex_lut_data{};
std::array<GLvec4, 256> proctex_diff_lut_data{};
bool allow_shadow;
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,371 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <list>
#include <memory>
#include <mutex>
#include <set>
#include <tuple>
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
#endif
#include <boost/icl/interval_map.hpp>
#include <boost/icl/interval_set.hpp>
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#include <unordered_map>
#include <vulkan/vulkan.hpp>
#include <boost/functional/hash.hpp>
#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/custom_tex_cache.h"
#include "video_core/renderer_vulkan/vk_surface_params.h"
#include "video_core/renderer_vulkan/vk_texture.h"
#include "video_core/texture/texture_decode.h"
namespace Vulkan {
class RasterizerCacheVulkan;
class TextureFilterer;
class FormatReinterpreterVulkan;
const vk::Format& GetFormatTuple(SurfaceParams::PixelFormat pixel_format);
struct HostTextureTag {
vk::Format format;
u32 width;
u32 height;
bool operator==(const HostTextureTag& rhs) const noexcept {
return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height);
};
};
struct TextureCubeConfig {
PAddr px;
PAddr nx;
PAddr py;
PAddr ny;
PAddr pz;
PAddr nz;
u32 width;
Pica::TexturingRegs::TextureFormat format;
bool operator==(const TextureCubeConfig& rhs) const {
return std::tie(px, nx, py, ny, pz, nz, width, format) ==
std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format);
}
bool operator!=(const TextureCubeConfig& rhs) const {
return !(*this == rhs);
}
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::HostTextureTag> {
std::size_t operator()(const Vulkan::HostTextureTag& tag) const noexcept {
std::size_t hash = 0;
boost::hash_combine(hash, tag.format);
boost::hash_combine(hash, tag.width);
boost::hash_combine(hash, tag.height);
return hash;
}
};
template <>
struct hash<Vulkan::TextureCubeConfig> {
std::size_t operator()(const Vulkan::TextureCubeConfig& config) const noexcept {
std::size_t hash = 0;
boost::hash_combine(hash, config.px);
boost::hash_combine(hash, config.nx);
boost::hash_combine(hash, config.py);
boost::hash_combine(hash, config.ny);
boost::hash_combine(hash, config.pz);
boost::hash_combine(hash, config.nz);
boost::hash_combine(hash, config.width);
boost::hash_combine(hash, static_cast<u32>(config.format));
return hash;
}
};
} // namespace std
namespace Vulkan {
using SurfaceSet = std::set<Surface>;
using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterval>;
using SurfaceMap =
boost::icl::interval_map<PAddr, Surface, boost::icl::partial_absorber, std::less,
boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
using SurfaceCache =
boost::icl::interval_map<PAddr, SurfaceSet, boost::icl::partial_absorber, std::less,
boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
"incorrect interval types");
using SurfaceRect_Tuple = std::tuple<Surface, Common::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
enum class ScaleMatch {
Exact, // only accept same res scale
Upscale, // only allow higher scale than params
Ignore // accept every scaled res
};
/**
* A watcher that notifies whether a cached surface has been changed. This is useful for caching
* surface collection objects, including texture cube and mipmap.
*/
struct SurfaceWatcher {
public:
explicit SurfaceWatcher(std::weak_ptr<CachedSurface>&& surface) : surface(std::move(surface)) {}
/**
* Checks whether the surface has been changed.
* @return false if the surface content has been changed since last Validate() call or has been
* destroyed; otherwise true
*/
bool IsValid() const {
return !surface.expired() && valid;
}
/// Marks that the content of the referencing surface has been updated to the watcher user.
void Validate() {
ASSERT(!surface.expired());
valid = true;
}
/// Gets the referencing surface. Returns null if the surface has been destroyed
Surface Get() const {
return surface.lock();
}
private:
friend struct CachedSurface;
std::weak_ptr<CachedSurface> surface;
bool valid = false;
};
class RasterizerCacheVulkan;
struct CachedSurface : SurfaceParams, std::enable_shared_from_this<CachedSurface> {
CachedSurface(RasterizerCacheVulkan& owner) : owner{owner} {}
~CachedSurface();
bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const;
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
}
bool IsSurfaceFullyInvalid() const {
auto interval = GetInterval();
return *invalid_regions.equal_range(interval).first == interval;
}
bool registered = false;
SurfaceRegions invalid_regions;
u32 fill_size = 0; /// Number of bytes to read from fill_data
std::array<u8, 4> fill_data;
VKTexture texture;
/// max mipmap level that has been attached to the texture
u32 max_level = 0;
/// level_watchers[i] watches the (i+1)-th level mipmap source surface
std::array<std::shared_ptr<SurfaceWatcher>, 7> level_watchers;
bool is_custom = false;
Core::CustomTexInfo custom_tex_info;
static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
return format == PixelFormat::Invalid
? 0
: (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture)
? 4
: SurfaceParams::GetFormatBpp(format) / 8;
}
std::vector<u8> vk_buffer;
// Read/Write data in 3DS memory to/from gl_buffer
void LoadGLBuffer(PAddr load_start, PAddr load_end);
void FlushGLBuffer(PAddr flush_start, PAddr flush_end);
// Custom texture loading and dumping
bool LoadCustomTexture(u64 tex_hash);
void DumpTexture(VKTexture& target_tex, u64 tex_hash);
// Upload/Download data in vk_buffer in/to this surface's texture
void UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_handle, GLuint draw_fb_handle);
void DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint read_fb_handle,
GLuint draw_fb_handle);
std::shared_ptr<SurfaceWatcher> CreateWatcher() {
auto watcher = std::make_shared<SurfaceWatcher>(weak_from_this());
watchers.push_front(watcher);
return watcher;
}
void InvalidateAllWatcher() {
for (const auto& watcher : watchers) {
if (auto locked = watcher.lock()) {
locked->valid = false;
}
}
}
void UnlinkAllWatcher() {
for (const auto& watcher : watchers) {
if (auto locked = watcher.lock()) {
locked->valid = false;
locked->surface.reset();
}
}
watchers.clear();
}
private:
RasterizerCacheVulkan& owner;
std::list<std::weak_ptr<SurfaceWatcher>> watchers;
};
struct CachedTextureCube {
VKTexture texture;
u16 res_scale = 1;
std::shared_ptr<SurfaceWatcher> px;
std::shared_ptr<SurfaceWatcher> nx;
std::shared_ptr<SurfaceWatcher> py;
std::shared_ptr<SurfaceWatcher> ny;
std::shared_ptr<SurfaceWatcher> pz;
std::shared_ptr<SurfaceWatcher> nz;
};
class TextureDownloader;
class RasterizerCacheVulkan : NonCopyable {
public:
RasterizerCacheVulkan();
~RasterizerCacheVulkan();
/// Blit one surface's texture to another
bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle<u32>& src_rect,
const Surface& dst_surface, const Common::Rectangle<u32>& dst_rect);
/// Copy one surface's region to another
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
SurfaceInterval copy_interval);
/// Load a texture from 3DS memory to OpenGL and cache it (if not already cached)
Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// 3DS memory to OpenGL and caches it (if not already cached)
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0);
/// Get a texture cube based on the texture configuration
const CachedTextureCube& GetTextureCube(const TextureCubeConfig& config);
/// Get the color and depth surfaces based on the framebuffer configuration
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const Common::Rectangle<s32>& viewport_rect);
/// Get a surface that matches the fill config
Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config);
/// Get a surface that matches a "texture copy" display transfer config
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
/// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr);
/// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner);
/// Flush all cached resources tracked by this cache manager
void FlushAll();
/// Clear all cached resources tracked by this cache manager
void ClearAll(bool flush);
// Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead
// in the driver
// this must be placed above the surface_cache to ensure all cached surfaces are destroyed
// before destroying the recycler
std::unordered_multimap<HostTextureTag, VKTexture> host_texture_recycler;
private:
void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
/// Update surface's texture for given region when necessary
void ValidateSurface(const Surface& surface, PAddr addr, u32 size);
// Returns false if there is a surface in the cache at the interval with the same bit-width,
bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface,
Vulkan::SurfaceParams& params,
const Vulkan::SurfaceInterval& interval);
// Return true if a surface with an invalid pixel format exists at the interval
bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval);
// Attempt to find a reinterpretable surface in the cache and use it to copy for validation
bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params,
const SurfaceInterval& interval);
/// Create a new surface
Surface CreateSurface(const SurfaceParams& params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
/// Remove surface from the cache
void UnregisterSurface(const Surface& surface);
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
SurfaceCache surface_cache;
PageMap cached_pages;
SurfaceMap dirty_regions;
SurfaceSet remove_surfaces;
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
u16 resolution_scale_factor;
std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
std::recursive_mutex mutex;
public:
VKTexture AllocateSurfaceTexture(vk::Format format, u32 width, u32 height);
std::unique_ptr<TextureFilterer> texture_filterer;
std::unique_ptr<FormatReinterpreterVulkan> format_reinterpreter;
std::unique_ptr<TextureDownloader> texture_downloader_es;
};
} // namespace OpenGL

View File

@ -0,0 +1,164 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_resource_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include <algorithm>
#include <array>
#include <type_traits>
namespace Vulkan {
VKResourceCache::~VKResourceCache()
{
for (int i = 0; i < DESCRIPTOR_SET_LAYOUT_COUNT; i++) {
g_vk_instace->GetDevice().destroyDescriptorSetLayout(descriptor_layouts[i]);
}
}
bool VKResourceCache::Initialize()
{
// Define the descriptor sets we will be using
std::array<vk::DescriptorSetLayoutBinding, 2> ubo_set = {{
{ 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex |
vk::ShaderStageFlagBits::eGeometry | vk::ShaderStageFlagBits::eFragment }, // shader_data
{ 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex } // pica_uniforms
}};
std::array<vk::DescriptorSetLayoutBinding, 4> texture_set = {{
{ 0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex0
{ 1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex1
{ 2, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex2
{ 3, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eFragment }, // tex_cube
}};
std::array<vk::DescriptorSetLayoutBinding, 3> lut_set = {{
{ 0, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // texture_buffer_lut_lf
{ 1, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // texture_buffer_lut_rg
{ 2, vk::DescriptorType::eStorageTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment } // texture_buffer_lut_rgba
}};
// Create and store descriptor set layouts
std::array<vk::DescriptorSetLayoutCreateInfo, DESCRIPTOR_SET_LAYOUT_COUNT> create_infos = {{
{ vk::DescriptorSetLayoutCreateFlags(), ubo_set },
{ vk::DescriptorSetLayoutCreateFlags(), texture_set },
{ vk::DescriptorSetLayoutCreateFlags(), lut_set }
}};
for (int i = 0; i < DESCRIPTOR_SET_LAYOUT_COUNT; i++) {
descriptor_layouts[i] = g_vk_instace->GetDevice().createDescriptorSetLayout(create_infos[i]);
}
// Create the standard descriptor set layout
vk::PipelineLayoutCreateInfo layout_info({}, descriptor_layouts);
pipeline_layout = g_vk_instace->GetDevice().createPipelineLayoutUnique(layout_info);
if (!CreateStaticSamplers())
return false;
// Create global texture staging buffer
texture_upload_buffer.Create(MAX_TEXTURE_UPLOAD_BUFFER_SIZE,
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::BufferUsageFlagBits::eTransferSrc);
return true;
}
vk::Sampler VKResourceCache::GetSampler(const SamplerInfo& info)
{
auto iter = sampler_cache.find(info);
if (iter != sampler_cache.end()) {
return iter->second;
}
// Create texture sampler
auto properties = g_vk_instace->GetPhysicalDevice().getProperties();
auto features = g_vk_instace->GetPhysicalDevice().getFeatures();
vk::SamplerCreateInfo sampler_info
(
{},
info.mag_filter,
info.min_filter,
info.mipmap_mode,
info.wrapping[0], info.wrapping[1], info.wrapping[2],
{},
features.samplerAnisotropy,
properties.limits.maxSamplerAnisotropy,
false,
vk::CompareOp::eAlways,
{},
{},
vk::BorderColor::eFloatTransparentBlack,
false
);
auto sampler = g_vk_instace->GetDevice().createSamplerUnique(sampler_info);
vk::Sampler handle = sampler.get();
// Store it even if it failed
sampler_cache.emplace(info, std::move(sampler));
return handle;
}
vk::RenderPass VKResourceCache::GetRenderPass(vk::Format color_format, vk::Format depth_format,
u32 multisamples, vk::AttachmentLoadOp load_op)
{
auto key = std::tie(color_format, depth_format, multisamples, load_op);
auto it = render_pass_cache.find(key);
if (it != render_pass_cache.end()) {
return it->second;
}
vk::SubpassDescription subpass({}, vk::PipelineBindPoint::eGraphics);
std::array<vk::AttachmentDescription, 2> attachments;
std::array<vk::AttachmentReference, 2> references;
u32 index = 0;
if (color_format != vk::Format::eUndefined) {
references[index] = vk::AttachmentReference{index, vk::ImageLayout::eColorAttachmentOptimal};
attachments[index] =
{
{},
color_format,
static_cast<vk::SampleCountFlagBits>(multisamples),
load_op,
vk::AttachmentStoreOp::eStore,
vk::AttachmentLoadOp::eDontCare,
vk::AttachmentStoreOp::eDontCare,
vk::ImageLayout::eColorAttachmentOptimal,
vk::ImageLayout::eColorAttachmentOptimal
};
subpass.setColorAttachmentCount(1);
subpass.setPColorAttachments(&references[index++]);
}
if (depth_format != vk::Format::eUndefined) {
references[index] = vk::AttachmentReference{index, vk::ImageLayout::eDepthStencilAttachmentOptimal};
attachments[index] =
{
{},
depth_format,
static_cast<vk::SampleCountFlagBits>(multisamples),
load_op,
vk::AttachmentStoreOp::eStore,
vk::AttachmentLoadOp::eDontCare,
vk::AttachmentStoreOp::eDontCare,
vk::ImageLayout::eDepthStencilAttachmentOptimal,
vk::ImageLayout::eDepthStencilAttachmentOptimal
};
subpass.setPDepthStencilAttachment(&references[index++]);
}
std::array<vk::SubpassDescription, 1> subpasses = { subpass };
vk::RenderPassCreateInfo renderpass_info({}, attachments, subpasses);
auto renderpass = g_vk_instace->GetDevice().createRenderPassUnique(renderpass_info);
vk::RenderPass handle = renderpass.get();
render_pass_cache.emplace(key, std::move(renderpass));
return handle;
}
} // namespace Vulkan

View File

@ -0,0 +1,58 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include "video_core/renderer_vulkan/vk_texture.h"
namespace Vulkan {
using RenderPassCacheKey = std::tuple<vk::Format, vk::Format, u32, vk::AttachmentLoadOp>;
constexpr u32 MAX_TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
constexpr u32 DESCRIPTOR_SET_LAYOUT_COUNT = 3;
class VKResourceCache
{
public:
VKResourceCache() = default;
~VKResourceCache();
// Perform at startup, create descriptor layouts, compiles all static shaders.
bool Initialize();
void Shutdown();
// Public interface.
VKBuffer& GetTextureUploadBuffer() { return texture_upload_buffer; }
vk::Sampler GetSampler(const SamplerInfo& info);
vk::RenderPass GetRenderPass(vk::Format color_format, vk::Format depth_format, u32 multisamples, vk::AttachmentLoadOp load_op);
vk::PipelineCache GetPipelineCache() const { return pipeline_cache.get(); }
private:
// Dummy image for samplers that are unbound
VKTexture dummy_texture;
VKBuffer texture_upload_buffer;
// Descriptor sets
std::array<vk::DescriptorSetLayout, DESCRIPTOR_SET_LAYOUT_COUNT> descriptor_layouts;
vk::UniquePipelineLayout pipeline_layout;
// Render pass cache
std::unordered_map<RenderPassCacheKey, vk::RenderPass> render_pass_cache;
std::unordered_map<SamplerInfo, vk::Sampler> sampler_cache;
vk::UniquePipelineCache pipeline_cache;
std::string pipeline_cache_filename;
};
extern std::unique_ptr<VKResourceCache> g_object_cache;
} // namespace Vulkan

View File

@ -1,246 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
namespace OpenGL {
void OGLRenderbuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenRenderbuffers(1, &handle);
}
void OGLRenderbuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteRenderbuffers(1, &handle);
OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply();
handle = 0;
}
void OGLTexture::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
}
void OGLTexture::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
OpenGLState::GetCurState().ResetTexture(handle).Apply();
handle = 0;
}
void OGLTexture::Allocate(GLenum target, GLsizei levels, GLenum internalformat, GLenum format,
GLenum type, GLsizei width, GLsizei height, GLsizei depth) {
const bool tex_storage = GLAD_GL_ARB_texture_storage || GLES;
switch (target) {
case GL_TEXTURE_1D:
case GL_TEXTURE:
if (tex_storage) {
glTexStorage1D(target, levels, internalformat, width);
} else {
for (GLsizei level{0}; level < levels; ++level) {
glTexImage1D(target, level, internalformat, width, 0, format, type, nullptr);
width >>= 1;
}
}
break;
case GL_TEXTURE_2D:
case GL_TEXTURE_1D_ARRAY:
case GL_TEXTURE_RECTANGLE:
case GL_TEXTURE_CUBE_MAP:
if (tex_storage) {
glTexStorage2D(target, levels, internalformat, width, height);
} else {
for (GLsizei level{0}; level < levels; ++level) {
glTexImage2D(target, level, internalformat, width, height, 0, format, type,
nullptr);
width >>= 1;
if (target != GL_TEXTURE_1D_ARRAY)
height >>= 1;
}
}
break;
case GL_TEXTURE_3D:
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_CUBE_MAP_ARRAY:
if (tex_storage) {
glTexStorage3D(target, levels, internalformat, width, height, depth);
} else {
for (GLsizei level{0}; level < levels; ++level) {
glTexImage3D(target, level, internalformat, width, height, depth, 0, format, type,
nullptr);
}
width >>= 1;
height >>= 1;
if (target == GL_TEXTURE_3D)
depth >>= 1;
}
break;
}
if (!tex_storage) {
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels - 1);
}
}
void OGLSampler::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenSamplers(1, &handle);
}
void OGLSampler::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
OpenGLState::GetCurState().ResetSampler(handle).Apply();
handle = 0;
}
void OGLShader::Create(const char* source, GLenum type) {
if (handle != 0)
return;
if (source == nullptr)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = LoadShader(source, type);
}
void OGLShader::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteShader(handle);
handle = 0;
}
void OGLProgram::Create(bool separable_program, const std::vector<GLuint>& shaders) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = LoadProgram(separable_program, shaders);
}
void OGLProgram::Create(const char* vert_shader, const char* frag_shader) {
OGLShader vert, frag;
vert.Create(vert_shader, GL_VERTEX_SHADER);
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
Create(false, {vert.handle, frag.handle});
}
void OGLProgram::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
OpenGLState::GetCurState().ResetProgram(handle).Apply();
handle = 0;
}
void OGLPipeline::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenProgramPipelines(1, &handle);
}
void OGLPipeline::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
OpenGLState::GetCurState().ResetPipeline(handle).Apply();
handle = 0;
}
void OGLBuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenBuffers(1, &handle);
}
void OGLBuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteBuffers(1, &handle);
OpenGLState::GetCurState().ResetBuffer(handle).Apply();
handle = 0;
}
void OGLVertexArray::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenVertexArrays(1, &handle);
}
void OGLVertexArray::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteVertexArrays(1, &handle);
OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
handle = 0;
}
void OGLFramebuffer::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenFramebuffers(1, &handle);
}
void OGLFramebuffer::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
handle = 0;
}
} // namespace OpenGL

View File

@ -1,243 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <utility>
#include <vector>
#include <vulkan/vulkan.hpp>
#include <glm/glm.hpp>
#include "common/common_types.h"
namespace Vulkan {
class VKContext;
struct VertexInfo
{
VertexInfo() = default;
VertexInfo(glm::vec3 position, glm::vec3 color, glm::vec2 coords) :
position(position), color(color), texcoords(coords) {};
glm::vec3 position;
glm::vec3 color;
glm::vec2 texcoords;
};
struct Vertex : public VertexInfo
{
Vertex() = default;
Vertex(glm::vec3 position, glm::vec3 color = {}, glm::vec2 coords = {}) : VertexInfo(position, color, coords) {};
static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo));
static constexpr std::array<vk::VertexInputAttributeDescription, 3> attribute_desc =
{
vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, position)),
vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, color)),
vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, texcoords)),
};
};
class VKBuffer : public NonCopyable, public Resource
{
friend class VertexBuffer;
public:
VKBuffer(std::shared_ptr<VKContext> context);
~VKBuffer();
void Create(uint32_t size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage);
void Bind(vk::CommandBuffer& command_buffer);
static uint32_t FindMemoryType(uint32_t type_filter, vk::MemoryPropertyFlags properties, std::shared_ptr<VKContext> context);
static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region);
public:
void* memory = nullptr;
vk::UniqueBuffer buffer;
vk::UniqueDeviceMemory buffer_memory;
vk::UniqueBufferView buffer_view;
uint32_t size = 0;
protected:
std::shared_ptr<VKContext> context;
};
class VKTexture : public NonCopyable, public Resource
{
friend class VkContext;
public:
VKTexture(const std::shared_ptr<VKContext>& context);
~VKTexture() = default;
void Create(int width, int height, vk::ImageType type, vk::Format format = vk::Format::eR8G8B8A8Uint);
void CopyPixels(uint8_t* pixels, uint32_t count);
private:
void TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout);
private:
// Texture buffer
void* pixels = nullptr;
std::shared_ptr<VKContext> context;
uint32_t width = 0, height = 0, channels = 0;
VKBuffer staging;
// Texture objects
vk::UniqueImage texture;
vk::UniqueImageView texture_view;
vk::UniqueDeviceMemory texture_memory;
vk::UniqueSampler texture_sampler;
vk::Format format;
};
class OGLShader : private NonCopyable {
public:
OGLShader() = default;
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLShader() {
Release();
}
OGLShader& operator=(OGLShader&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
void Create(const char* source, GLenum type);
void Release();
GLuint handle = 0;
};
class OGLProgram : private NonCopyable {
public:
OGLProgram() = default;
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLProgram() {
Release();
}
OGLProgram& operator=(OGLProgram&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new program from given shader objects
void Create(bool separable_program, const std::vector<GLuint>& shaders);
/// Creates a new program from given shader soruce code
void Create(const char* vert_shader, const char* frag_shader);
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLPipeline : private NonCopyable {
public:
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) noexcept {
handle = std::exchange<GLuint>(o.handle, 0);
}
~OGLPipeline() {
Release();
}
OGLPipeline& operator=(OGLPipeline&& o) noexcept {
Release();
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLBuffer : private NonCopyable {
public:
OGLBuffer() = default;
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLBuffer() {
Release();
}
OGLBuffer& operator=(OGLBuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLVertexArray : private NonCopyable {
public:
OGLVertexArray() = default;
OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLVertexArray() {
Release();
}
OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLFramebuffer : private NonCopyable {
public:
OGLFramebuffer() = default;
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLFramebuffer() {
Release();
}
OGLFramebuffer& operator=(OGLFramebuffer&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
} // namespace OpenGL

View File

@ -0,0 +1,235 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <optional>
#include <string>
#include <type_traits>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace Vulkan {
enum class ProgramType : u32 { VS, GS, FS };
enum Attributes {
ATTRIBUTE_POSITION,
ATTRIBUTE_COLOR,
ATTRIBUTE_TEXCOORD0,
ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2,
ATTRIBUTE_TEXCOORD0_W,
ATTRIBUTE_NORMQUAT,
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
Pica::TexturingRegs::TevStageConfig stage;
stage.sources_raw = sources_raw;
stage.modifiers_raw = modifiers_raw;
stage.ops_raw = ops_raw;
stage.const_color = 0;
stage.scales_raw = scales_raw;
return stage;
}
};
struct PicaFSConfigState {
Pica::FramebufferRegs::CompareFunc alpha_test_func;
Pica::RasterizerRegs::ScissorMode scissor_test_mode;
Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
bool texture2_use_coord1;
std::array<TevStageConfigRaw, 6> tev_stages;
u8 combiner_buffer_input;
Pica::RasterizerRegs::DepthBuffering depthmap_enable;
Pica::TexturingRegs::FogMode fog_mode;
bool fog_flip;
bool alphablend_enable;
Pica::FramebufferRegs::LogicOp logic_op;
struct {
struct {
unsigned num;
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
bool spot_atten_enable;
bool geometric_factor_0;
bool geometric_factor_1;
bool shadow_enable;
} light[8];
bool enable;
unsigned src_num;
Pica::LightingRegs::LightingBumpMode bump_mode;
unsigned bump_selector;
bool bump_renorm;
bool clamp_highlights;
Pica::LightingRegs::LightingConfig config;
bool enable_primary_alpha;
bool enable_secondary_alpha;
bool enable_shadow;
bool shadow_primary;
bool shadow_secondary;
bool shadow_invert;
bool shadow_alpha;
unsigned shadow_selector;
struct {
bool enable;
bool abs_input;
Pica::LightingRegs::LightingLutInput type;
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
bool enable;
u32 coord;
Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
bool separate_alpha;
bool noise_enable;
Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
u32 lut_width;
u32 lut_offset0;
u32 lut_offset1;
u32 lut_offset2;
u32 lut_offset3;
u32 lod_min;
u32 lod_max;
Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex;
bool shadow_rendering;
bool shadow_texture_orthographic;
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
* directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
* Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
* two separate shaders sharing the same key.
*/
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
/// Construct a PicaFSConfig with the given Pica register configuration.
static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
};
/**
* This struct contains common information to identify a GL vertex/geometry shader generated from
* PICA vertex/geometry shader.
*/
struct PicaShaderConfigCommon {
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
u64 program_hash;
u64 swizzle_hash;
u32 main_offset;
bool sanitize_mul;
u32 num_outputs;
// output_map[output register index] -> output attribute index
std::array<u32, 16> output_map;
};
/**
* This struct contains information to identify a GL vertex shader generated from PICA vertex
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
state.Init(regs, setup);
}
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
state = conf;
}
};
struct PicaGSConfigCommonRaw {
void Init(const Pica::Regs& regs);
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
* This struct contains information to identify a GL geometry shader generated from PICA no-geometry
* shader pipeline
*/
struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
explicit PicaFixedGSConfig(const Pica::Regs& regs) {
state.Init(regs);
}
};
/**
* This struct combines the vertex and fragment states for a complete pipeline cache key
*/
struct VKPipelineCacheKey {
VKPipelineCacheKey(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) :
vertex_config(regs.vs, setup), fragment_config(PicaFSConfig::BuildFromRegs(regs)) {}
PicaVSConfig vertex_config;
PicaFSConfig fragment_config;
};
} // namespace Vulkan
namespace std {
template <>
struct hash<Vulkan::PicaFSConfig> {
std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaVSConfig> {
std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Vulkan::PicaFixedGSConfig> {
std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@ -41,6 +41,15 @@ constexpr uint ShadowTextureNZ = 6;
class VulkanState {
public:
struct Messenger {
bool cull_state;
bool depth_state;
bool color_mask;
bool stencil_state;
bool logic_op;
bool texture_state;
};
struct {
bool enabled;
vk::CullModeFlags mode;
@ -130,18 +139,8 @@ public:
return cur_state;
}
/// Apply this state as the current OpenGL state
void Apply() const;
/// Resets any references to the given resource
VulkanState& ResetTexture(GLuint handle);
VulkanState& ResetSampler(GLuint handle);
VulkanState& ResetProgram(GLuint handle);
VulkanState& ResetPipeline(GLuint handle);
VulkanState& ResetBuffer(GLuint handle);
VulkanState& ResetVertexArray(GLuint handle);
VulkanState& ResetFramebuffer(GLuint handle);
VulkanState& ResetRenderbuffer(GLuint handle);
/// Apply all dynamic state to the provided Vulkan command buffer
void Apply(vk::CommandBuffer& command_buffer) const;
private:
static VulkanState cur_state;

View File

@ -0,0 +1,171 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "video_core/renderer_vulkan/vk_rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_surface_params.h"
namespace Vulkan {
SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const {
SurfaceParams params = *this;
const u32 tiled_size = is_tiled ? 8 : 1;
const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size);
PAddr aligned_start =
addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes);
PAddr aligned_end =
addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes);
if (aligned_end - aligned_start > stride_tiled_bytes) {
params.addr = aligned_start;
params.height = (aligned_end - aligned_start) / BytesInPixels(stride);
} else {
// 1 row
ASSERT(aligned_end - aligned_start == stride_tiled_bytes);
const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1);
aligned_start =
addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment);
aligned_end =
addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment);
params.addr = aligned_start;
params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size;
params.stride = params.width;
params.height = tiled_size;
}
params.UpdateParams();
return params;
}
SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle<u32> unscaled_rect) const {
if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) {
return {};
}
if (is_tiled) {
unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8;
unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8;
unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8;
unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8;
}
const u32 stride_tiled = !is_tiled ? stride : stride * 8;
const u32 pixel_offset =
stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) +
unscaled_rect.left;
const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth();
return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)};
}
SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const {
SurfaceInterval result{};
const auto valid_regions =
SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions;
for (auto& valid_interval : valid_regions) {
const SurfaceInterval aligned_interval{
addr + Common::AlignUp(boost::icl::first(valid_interval) - addr,
BytesInPixels(is_tiled ? 8 * 8 : 1)),
addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr,
BytesInPixels(is_tiled ? 8 * 8 : 1))};
if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) ||
boost::icl::length(aligned_interval) == 0) {
continue;
}
// Get the rectangle within aligned_interval
const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1);
SurfaceInterval rect_interval{
addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes),
addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes),
};
if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) {
// 1 row
rect_interval = aligned_interval;
} else if (boost::icl::length(rect_interval) == 0) {
// 2 rows that do not make a rectangle, return the larger one
const SurfaceInterval row1{boost::icl::first(aligned_interval),
boost::icl::first(rect_interval)};
const SurfaceInterval row2{boost::icl::first(rect_interval),
boost::icl::last_next(aligned_interval)};
rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2;
}
if (boost::icl::length(rect_interval) > boost::icl::length(result)) {
result = rect_interval;
}
}
return result;
}
Common::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const {
const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr);
if (is_tiled) {
const int x0 = (begin_pixel_index % (stride * 8)) / 8;
const int y0 = (begin_pixel_index / (stride * 8)) * 8;
// Top to bottom
return Common::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width,
height - (y0 + sub_surface.height));
}
const int x0 = begin_pixel_index % stride;
const int y0 = begin_pixel_index / stride;
// Bottom to top
return Common::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0);
}
Common::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const {
auto rect = GetSubRect(sub_surface);
rect.left = rect.left * res_scale;
rect.right = rect.right * res_scale;
rect.top = rect.top * res_scale;
rect.bottom = rect.bottom * res_scale;
return rect;
}
bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
return std::tie(other_surface.addr, other_surface.width, other_surface.height,
other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
pixel_format != PixelFormat::Invalid;
}
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
return sub_surface.addr >= addr && sub_surface.end <= end &&
sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
sub_surface.is_tiled == is_tiled &&
(sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
GetSubRect(sub_surface).right <= stride;
}
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
(std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
0;
}
bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr ||
end < texcopy_params.end) {
return false;
}
if (texcopy_params.width != texcopy_params.stride) {
const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1));
return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(texcopy_params.height == 1 || texcopy_params.stride == tile_stride) &&
((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride;
}
return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval();
}
} // namespace OpenGL

View File

@ -0,0 +1,270 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <climits>
#include <boost/icl/interval.hpp>
#include "common/assert.h"
#include "common/math_util.h"
#include "core/hw/gpu.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_texturing.h"
namespace Vulkan {
struct CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceInterval = boost::icl::right_open_interval<PAddr>;
struct SurfaceParams {
private:
static constexpr std::array<unsigned int, 18> BPP_TABLE = {
32, // RGBA8
24, // RGB8
16, // RGB5A1
16, // RGB565
16, // RGBA4
16, // IA8
16, // RG8
8, // I8
8, // A8
8, // IA4
4, // I4
4, // A4
4, // ETC1
8, // ETC1A4
16, // D16
0,
24, // D24
32, // D24S8
};
public:
enum class PixelFormat {
// First 5 formats are shared between textures and color buffers
RGBA8 = 0,
RGB8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
// Texture-only formats
IA8 = 5,
RG8 = 6,
I8 = 7,
A8 = 8,
IA4 = 9,
I4 = 10,
A4 = 11,
ETC1 = 12,
ETC1A4 = 13,
// Depth buffer-only formats
D16 = 14,
// gap
D24 = 16,
D24S8 = 17,
Invalid = 255,
};
enum class SurfaceType {
Color = 0,
Texture = 1,
Depth = 2,
DepthStencil = 3,
Fill = 4,
Invalid = 5
};
static constexpr unsigned int GetFormatBpp(PixelFormat format) {
const auto format_idx = static_cast<std::size_t>(format);
DEBUG_ASSERT_MSG(format_idx < BPP_TABLE.size(), "Invalid pixel format {}", format_idx);
return BPP_TABLE[format_idx];
}
unsigned int GetFormatBpp() const {
return GetFormatBpp(pixel_format);
}
static std::string_view PixelFormatAsString(PixelFormat format) {
switch (format) {
case PixelFormat::RGBA8:
return "RGBA8";
case PixelFormat::RGB8:
return "RGB8";
case PixelFormat::RGB5A1:
return "RGB5A1";
case PixelFormat::RGB565:
return "RGB565";
case PixelFormat::RGBA4:
return "RGBA4";
case PixelFormat::IA8:
return "IA8";
case PixelFormat::RG8:
return "RG8";
case PixelFormat::I8:
return "I8";
case PixelFormat::A8:
return "A8";
case PixelFormat::IA4:
return "IA4";
case PixelFormat::I4:
return "I4";
case PixelFormat::A4:
return "A4";
case PixelFormat::ETC1:
return "ETC1";
case PixelFormat::ETC1A4:
return "ETC1A4";
case PixelFormat::D16:
return "D16";
case PixelFormat::D24:
return "D24";
case PixelFormat::D24S8:
return "D24S8";
default:
return "Not a real pixel format";
}
}
static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
}
static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) {
return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
}
static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) {
return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
: PixelFormat::Invalid;
}
static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
switch (format) {
// RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
case GPU::Regs::PixelFormat::RGB565:
return PixelFormat::RGB565;
case GPU::Regs::PixelFormat::RGB5A1:
return PixelFormat::RGB5A1;
default:
return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
}
}
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b);
if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
(b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
return true;
}
if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
return true;
}
if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
return true;
}
return false;
}
static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) {
if ((unsigned int)pixel_format < 5) {
return SurfaceType::Color;
}
if ((unsigned int)pixel_format < 14) {
return SurfaceType::Texture;
}
if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
return SurfaceType::Depth;
}
if (pixel_format == PixelFormat::D24S8) {
return SurfaceType::DepthStencil;
}
return SurfaceType::Invalid;
}
/// Update the params "size", "end" and "type" from the already set "addr", "width", "height"
/// and "pixel_format"
void UpdateParams() {
if (stride == 0) {
stride = width;
}
type = GetFormatType(pixel_format);
size = !is_tiled ? BytesInPixels(stride * (height - 1) + width)
: BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8);
end = addr + size;
}
SurfaceInterval GetInterval() const {
return SurfaceInterval(addr, end);
}
// Returns the outer rectangle containing "interval"
SurfaceParams FromInterval(SurfaceInterval interval) const;
SurfaceInterval GetSubRectInterval(Common::Rectangle<u32> unscaled_rect) const;
// Returns the region of the biggest valid rectange within interval
SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
u32 GetScaledWidth() const {
return width * res_scale;
}
u32 GetScaledHeight() const {
return height * res_scale;
}
Common::Rectangle<u32> GetRect() const {
return {0, height, width, 0};
}
Common::Rectangle<u32> GetScaledRect() const {
return {0, GetScaledHeight(), GetScaledWidth(), 0};
}
u32 PixelsInBytes(u32 size) const {
return size * CHAR_BIT / GetFormatBpp(pixel_format);
}
u32 BytesInPixels(u32 pixels) const {
return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
}
bool ExactMatch(const SurfaceParams& other_surface) const;
bool CanSubRect(const SurfaceParams& sub_surface) const;
bool CanExpand(const SurfaceParams& expanded_surface) const;
bool CanTexCopy(const SurfaceParams& texcopy_params) const;
Common::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
Common::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
PAddr addr = 0;
PAddr end = 0;
u32 size = 0;
u32 width = 0;
u32 height = 0;
u32 stride = 0;
u16 res_scale = 1;
bool is_tiled = false;
PixelFormat pixel_format = PixelFormat::Invalid;
SurfaceType type = SurfaceType::Invalid;
};
} // namespace OpenGL

View File

@ -72,7 +72,6 @@ private:
public:
// Window attributes
GLFWwindow* window;
uint32_t width = 0, height = 0;
bool framebuffer_resized = false;
std::string_view name;

View File

@ -1,16 +1,20 @@
#include "vk_texture.h"
#include "vk_buffer.h"
#include "vk_context.h"
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
VkTexture::VkTexture(const std::shared_ptr<VkContext>& context) :
context(context), staging(context)
{
}
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_texture.h"
#include "video_core/renderer_vulkan/vk_instance.h"
void VkTexture::create(int width_, int height_, vk::ImageType type, vk::Format format_)
namespace Vulkan {
void VKTexture::Create(const Info& info)
{
auto& device = context->device;
format = format_; width = width_; height = height_;
auto& device = g_vk_instace->GetDevice();
format = info.format;
width = info.width;
height = info.height;
switch (format)
{
@ -23,57 +27,73 @@ void VkTexture::create(int width_, int height_, vk::ImageType type, vk::Format f
channels = 3;
break;
default:
throw std::runtime_error("[VK] Unknown texture format");
LOG_CRITICAL(Render_Vulkan, "Unknown texture format {}", format);
}
int image_size = width * height * channels;
staging.create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
// Create staging memory buffer for pixel transfers
u32 image_size = width * height * channels;
staging.Create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
vk::BufferUsageFlagBits::eTransferSrc);
pixels = staging.memory;
// Create the texture
vk::ImageCreateFlags flags = info.view_type == vk::ImageViewType::eCube ? vk::ImageCreateFlagBits::eCubeCompatible : {};
vk::ImageCreateInfo image_info
(
{},
type,
flags,
info.type,
format,
{ width, height, 1 }, 1, 1,
{ width, height, 1 }, info.mipmap_levels, info.array_layers,
vk::SampleCountFlagBits::e1,
vk::ImageTiling::eOptimal,
vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled
);
texture = device->createImageUnique(image_info);
texture = device.createImageUnique(image_info);
// Create texture memory
auto requirements = device->getImageMemoryRequirements(texture.get());
auto memory_index = Buffer::find_memory_type(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal, context);
auto requirements = device.getImageMemoryRequirements(texture.get());
auto memory_index = VKBuffer::FindMemoryType(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal);
vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index);
texture_memory = device->allocateMemoryUnique(alloc_info);
device->bindImageMemory(texture.get(), texture_memory.get(), 0);
texture_memory = device.allocateMemoryUnique(alloc_info);
device.bindImageMemory(texture.get(), texture_memory.get(), 0);
// Create texture view
vk::ImageViewCreateInfo view_info({}, texture.get(), vk::ImageViewType::e1D, format, {},
vk::ImageViewCreateInfo view_info({}, texture.get(), info.view_type, format, {},
vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1));
texture_view = device->createImageViewUnique(view_info);
texture_view = device.createImageViewUnique(view_info);
// Create texture sampler
auto props = context->physical_device.getProperties();
vk::SamplerCreateInfo sampler_info({}, vk::Filter::eNearest, vk::Filter::eNearest, vk::SamplerMipmapMode::eNearest, vk::SamplerAddressMode::eClampToEdge,
vk::SamplerAddressMode::eClampToEdge, vk::SamplerAddressMode::eClampToEdge, {}, true, props.limits.maxSamplerAnisotropy,
false, vk::CompareOp::eAlways, {}, {}, vk::BorderColor::eIntOpaqueBlack, false);
auto properties = g_vk_instace->GetPhysicalDevice().getProperties();
vk::SamplerCreateInfo sampler_info
(
{},
info.sampler_info.mag_filter,
info.sampler_info.min_filter,
info.sampler_info.mipmap_mode,
info.sampler_info.wrapping[0], info.sampler_info.wrapping[1], info.sampler_info.wrapping[2],
{},
true,
properties.limits.maxSamplerAnisotropy,
false,
vk::CompareOp::eAlways,
{},
{},
vk::BorderColor::eIntOpaqueBlack,
false
);
texture_sampler = device->createSamplerUnique(sampler_info);
texture_sampler = device.createSamplerUnique(sampler_info);
}
void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout new_layout)
void VKTexture::TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout)
{
auto& device = context->device;
auto& queue = context->graphics_queue;
auto& device = g_vk_instace->GetDevice();
auto& queue = g_vk_instace->graphics_queue;
vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1);
vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0];
vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1);
vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0];
command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
@ -82,25 +102,23 @@ void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout ne
std::array<vk::ImageMemoryBarrier, 1> barriers = { barrier };
vk::PipelineStageFlags source_stage, destination_stage;
if (old_layout == vk::ImageLayout::eUndefined && new_layout == vk::ImageLayout::eTransferDstOptimal)
{
if (old_layout == vk::ImageLayout::eUndefined && new_layout == vk::ImageLayout::eTransferDstOptimal) {
barrier.srcAccessMask = vk::AccessFlagBits::eNone;
barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite;
source_stage = vk::PipelineStageFlagBits::eTopOfPipe;
destination_stage = vk::PipelineStageFlagBits::eTransfer;
}
else if (old_layout == vk::ImageLayout::eTransferDstOptimal && new_layout == vk::ImageLayout::eShaderReadOnlyOptimal)
{
else if (old_layout == vk::ImageLayout::eTransferDstOptimal && new_layout == vk::ImageLayout::eShaderReadOnlyOptimal) {
barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite;
barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead;
source_stage = vk::PipelineStageFlagBits::eTransfer;
destination_stage = vk::PipelineStageFlagBits::eFragmentShader;
}
else
{
throw std::invalid_argument("[VK] Unsupported layout transition!");
else {
LOG_CRITICAL(Render_Vulkan, "Unsupported layout transition");
UNREACHABLE();
}
command_buffer.pipelineBarrier(source_stage, destination_stage, vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
@ -110,23 +128,23 @@ void VkTexture::transition_layout(vk::ImageLayout old_layout, vk::ImageLayout ne
queue.submit(submit_info, nullptr);
queue.waitIdle();
device->freeCommandBuffers(context->command_pool.get(), command_buffer);
device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer);
}
void VkTexture::copy_pixels(uint8_t* new_pixels, uint32_t count)
void VKTexture::CopyPixels(std::span<u32> new_pixels)
{
auto& device = context->device;
auto& queue = context->graphics_queue;
auto& device = g_vk_instace->GetDevice();
auto& queue = g_vk_instace->graphics_queue;
// Transition image to transfer format
transition_layout(vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal);
TransitionLayout(vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal);
// Copy pixels to staging buffer
std::memcpy(pixels, new_pixels, count * channels);
std::memcpy(pixels, new_pixels.data(), new_pixels.size() * channels);
// Copy the staging buffer to the image
vk::CommandBufferAllocateInfo alloc_info(context->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1);
vk::CommandBuffer command_buffer = device->allocateCommandBuffers(alloc_info)[0];
vk::CommandBufferAllocateInfo alloc_info(g_vk_instace->command_pool.get(), vk::CommandBufferLevel::ePrimary, 1);
vk::CommandBuffer command_buffer = device.allocateCommandBuffers(alloc_info)[0];
command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
@ -140,8 +158,10 @@ void VkTexture::copy_pixels(uint8_t* new_pixels, uint32_t count)
queue.submit(submit_info, nullptr);
queue.waitIdle();
device->freeCommandBuffers(context->command_pool.get(), command_buffer);
device.freeCommandBuffers(g_vk_instace->command_pool.get(), command_buffer);
// Prepare for shader reads
transition_layout(vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal);
TransitionLayout(vk::ImageLayout::eTransferDstOptimal, vk::ImageLayout::eShaderReadOnlyOptimal);
}
}

View File

@ -1,29 +1,55 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "vk_buffer.h"
#include <memory>
#include <span>
#include "video_core/renderer_vulkan/vk_buffer.h"
class VkContext;
namespace Vulkan {
class VkTexture : public NonCopyable, public Resource
{
friend class VkContext;
struct SamplerInfo {
std::array<vk::SamplerAddressMode, 3> wrapping = { vk::SamplerAddressMode::eClampToEdge };
vk::Filter min_filter = vk::Filter::eLinear;
vk::Filter mag_filter = vk::Filter::eLinear;
vk::SamplerMipmapMode mipmap_mode = vk::SamplerMipmapMode::eLinear;
};
/// Vulkan texture object
class VKTexture final : public NonCopyable {
public:
VkTexture(const std::shared_ptr<VkContext>& context);
~VkTexture() = default;
/// Information for the creation of the target texture
struct Info {
u32 width, height;
vk::Format format;
vk::ImageType type;
vk::ImageViewType view_type;
u32 mipmap_levels = 1;
u32 array_layers = 1;
SamplerInfo sampler_info = {};
};
void create(int width, int height, vk::ImageType type, vk::Format format = vk::Format::eR8G8B8A8Uint);
void copy_pixels(std::span<u8> pixels);
VKTexture() = default;
VKTexture(VKTexture&&) = default;
~VKTexture() = default;
/// Create a new Vulkan texture object along with its sampler
void Create(const Info& info);
/// Copies CPU side pixel data to the GPU texture buffer
void CopyPixels(std::span<u32> pixels);
private:
void transition_layout(vk::ImageLayout old_layout, vk::ImageLayout new_layout);
/// Used to transition the image to an optimal layout during transfers
void TransitionLayout(vk::ImageLayout old_layout, vk::ImageLayout new_layout);
private:
// Texture buffer
void* pixels = nullptr;
std::shared_ptr<VkContext> context;
uint32_t width = 0, height = 0, channels = 0;
Buffer staging;
VKBuffer staging;
// Texture objects
vk::UniqueImage texture;
@ -32,3 +58,24 @@ private:
vk::UniqueSampler texture_sampler;
vk::Format format;
};
/// Vulkan framebuffer object similar to an FBO in OpenGL
class VKFramebuffer final : public NonCopyable {
public:
VKFramebuffer() = default;
~VKFramebuffer() = default;
// Create Vulkan framebuffer object
void Create(u32 width, u32 height, u32 layers, u32 samples);
VkRect2D GetRect() const { return VkRect2D{{0, 0}, {width, height}}; }
private:
u32 width, height;
vk::UniqueFramebuffer framebuffer;
vk::RenderPass load_renderpass;
vk::RenderPass discard_renderpass;
vk::RenderPass clear_renderpass;
};
}