From 72340c56850290863337062165d6912d80e918c1 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 13 May 2022 19:16:53 +0300 Subject: [PATCH] renderer_vulkan: Port rasterizer to Vulkan * This is a massive code dump so I'm not going to even attempt to explain what changed --- src/video_core/rasterizer_interface.h | 11 + .../renderer_vulkan/renderer_vulkan.h | 8 +- src/video_core/renderer_vulkan/vk_buffer.cpp | 62 +- src/video_core/renderer_vulkan/vk_buffer.h | 18 +- .../renderer_vulkan/vk_instance.cpp | 3 + src/video_core/renderer_vulkan/vk_instance.h | 2 + .../renderer_vulkan/vk_rasterizer.cpp | 1323 +++++------------ .../renderer_vulkan/vk_rasterizer.h | 195 ++- .../renderer_vulkan/vk_rasterizer_cache.cpp | 315 +--- .../renderer_vulkan/vk_rasterizer_cache.h | 7 - .../renderer_vulkan/vk_resource_cache.cpp | 375 +++++ .../renderer_vulkan/vk_resource_cache.h | 50 + src/video_core/renderer_vulkan/vk_state.cpp | 298 +++- src/video_core/renderer_vulkan/vk_state.h | 182 ++- .../renderer_vulkan/vk_swapchain.cpp | 21 +- src/video_core/renderer_vulkan/vk_swapchain.h | 9 +- .../renderer_vulkan/vk_task_scheduler.cpp | 42 +- .../renderer_vulkan/vk_task_scheduler.h | 21 +- src/video_core/renderer_vulkan/vk_texture.cpp | 199 +-- src/video_core/renderer_vulkan/vk_texture.h | 61 +- 20 files changed, 1534 insertions(+), 1668 deletions(-) diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 873e4273e..e8367dcbd 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -13,6 +13,10 @@ namespace OpenGL { struct ScreenInfo; } +namespace Vulkan { +struct ScreenInfo; +} + namespace Pica::Shader { struct OutputVertex; } // namespace Pica::Shader @@ -80,6 +84,13 @@ public: return false; } + /// Attempt to use a faster method to display the framebuffer to screen + virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + Vulkan::ScreenInfo& screen_info) { + return false; + } + /// Attempt to draw using hardware shaders virtual bool AccelerateDrawBatch(bool is_indexed) { return false; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index c9b307be8..ec2ad37fc 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -21,8 +21,7 @@ namespace Frontend { struct Frame { u32 width = 0, height = 0; bool color_reloaded = false; - Vulkan::VKTexture color; - Vulkan::VKFramebuffer render, present; + Vulkan::VKTexture* color; vk::UniqueFence render_fence, present_fence; }; } // namespace Frontend @@ -31,9 +30,9 @@ namespace Vulkan { /// Structure used for storing information about the display target for each 3DS screen struct ScreenInfo { - u32 display_texture; + Vulkan::VKTexture* display_texture; Common::Rectangle display_texcoords; - VKTexture texture; + Vulkan::VKTexture* texture; GPU::Regs::PixelFormat format; }; @@ -86,7 +85,6 @@ private: // OpenGL object IDs VKBuffer vertex_buffer; OGLProgram shader; - VKFramebuffer screenshot_framebuffer; OGLSampler filter_sampler; /// Display information for top and bottom screens respectively diff --git a/src/video_core/renderer_vulkan/vk_buffer.cpp b/src/video_core/renderer_vulkan/vk_buffer.cpp index c7dff59e6..3c83c15ec 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer.cpp @@ -12,54 +12,45 @@ namespace Vulkan { VKBuffer::~VKBuffer() { - if (memory != nullptr) { - g_vk_instace->GetDevice().unmapMemory(buffer_memory); - } - - auto deleter = [this]() { - if (buffer) { - auto& device = g_vk_instace->GetDevice(); - device.destroyBuffer(buffer); - device.freeMemory(buffer_memory); - device.destroyBufferView(buffer_view); + if (buffer) { + if (memory != nullptr) { + g_vk_instace->GetDevice().unmapMemory(buffer_memory); } - }; - g_vk_task_scheduler->Schedule(deleter); + auto deleter = [this]() { + auto& device = g_vk_instace->GetDevice(); + device.destroyBuffer(buffer); + device.freeMemory(buffer_memory); + }; + + g_vk_task_scheduler->Schedule(deleter); + } } -void VKBuffer::Create(u32 byte_count, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, - vk::Format view_format) { +void VKBuffer::Create(const VKBuffer::Info& info) { auto& device = g_vk_instace->GetDevice(); - size = byte_count; + buffer_info = info; - vk::BufferCreateInfo bufferInfo({}, byte_count, usage); + vk::BufferCreateInfo bufferInfo({}, info.size, info.usage); buffer = device.createBuffer(bufferInfo); auto mem_requirements = device.getBufferMemoryRequirements(buffer); - auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, properties); + auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, info.properties); vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index); buffer_memory = device.allocateMemory(alloc_info); device.bindBufferMemory(buffer, buffer_memory, 0); // Optionally map the buffer to CPU memory - if (properties & vk::MemoryPropertyFlagBits::eHostVisible) { - memory = device.mapMemory(buffer_memory, 0, byte_count); - } - - // Create buffer view for texel buffers - if (usage & vk::BufferUsageFlagBits::eStorageTexelBuffer || - usage & vk::BufferUsageFlagBits::eUniformTexelBuffer) { - vk::BufferViewCreateInfo view_info({}, buffer, view_format, 0, byte_count); - buffer_view = device.createBufferView(view_info); + if (info.properties & vk::MemoryPropertyFlagBits::eHostVisible) { + memory = device.mapMemory(buffer_memory, 0, info.size); } } -void VKBuffer::CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region) { +void VKBuffer::CopyBuffer(VKBuffer* src_buffer, VKBuffer* dst_buffer, vk::BufferCopy region) { auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - command_buffer.copyBuffer(src_buffer.buffer, dst_buffer.buffer, region); + command_buffer.copyBuffer(src_buffer->buffer, dst_buffer->buffer, region); } u32 VKBuffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties) { @@ -76,19 +67,4 @@ u32 VKBuffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties UNREACHABLE(); } -void StagingBuffer::Create(u32 size) { - buffer.Create(size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, - vk::BufferUsageFlagBits::eTransferSrc); -} - -u8* StagingBuffer::Request(u32 bytes) { - // Check if there is enough space left - if (buffer.GetSize() - end_offset >= bytes) { - u8* ptr = buffer.GetHostPointer() + end_offset; - end_offset += bytes; - - // Schedule the memory to be freed - } -} - } diff --git a/src/video_core/renderer_vulkan/vk_buffer.h b/src/video_core/renderer_vulkan/vk_buffer.h index d166c6c01..45d85694d 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.h +++ b/src/video_core/renderer_vulkan/vk_buffer.h @@ -15,29 +15,33 @@ namespace Vulkan { /// Generic Vulkan buffer object used by almost every resource class VKBuffer final : public NonCopyable { public: + struct Info { + u32 size; + vk::MemoryPropertyFlags properties; + vk::BufferUsageFlags usage; + }; + VKBuffer() = default; VKBuffer(VKBuffer&&) = default; ~VKBuffer(); - /// Create a generic Vulkan buffer object - void Create(u32 size, vk::MemoryPropertyFlags properties, vk::BufferUsageFlags usage, - vk::Format view_format = vk::Format::eUndefined); + /// Create a new Vulkan buffer object + void Create(const Info& info); /// Global utility functions used by other objects static u32 FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties); - static void CopyBuffer(VKBuffer& src_buffer, VKBuffer& dst_buffer, const vk::BufferCopy& region); + static void CopyBuffer(VKBuffer* src_buffer, VKBuffer* dst_buffer, vk::BufferCopy region); /// Return a pointer to the mapped memory if the buffer is host mapped u8* GetHostPointer() { return reinterpret_cast(memory); } vk::Buffer& GetBuffer() { return buffer; } - u32 GetSize() const { return size; } + u32 GetSize() const { return buffer_info.size; } private: + Info buffer_info; void* memory = nullptr; vk::Buffer buffer; vk::DeviceMemory buffer_memory; - vk::BufferView buffer_view; - uint32_t size = 0; }; } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 80ed57056..39b94c546 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -18,6 +18,9 @@ bool VKInstance::Create(vk::Instance instance, vk::PhysicalDevice physical_devic this->instance = instance; this->physical_device = physical_device; + // Get physical device limits + device_limits = physical_device.getProperties().limits; + // Determine required extensions and features if (!FindExtensions() || !FindFeatures()) return false; diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 765085fe1..3576c0999 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -34,6 +34,7 @@ public: /// Feature support bool SupportsAnisotropicFiltering() const; + u32 UniformMinAlignment() const { return device_limits.minUniformBufferOffsetAlignment; } private: bool CreateDevice(vk::SurfaceKHR surface, bool validation_enabled); @@ -54,6 +55,7 @@ public: std::vector device_extensions; vk::PhysicalDeviceFeatures device_features{}; vk::PhysicalDeviceVulkan12Features new_features{}; + vk::PhysicalDeviceLimits device_limits; }; extern std::unique_ptr g_vk_instace; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2f04412a4..30b3b6a7b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1,4 +1,4 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -20,14 +20,16 @@ #include "video_core/regs_framebuffer.h" #include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/renderer_opengl/pica_to_gl.h" -#include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/renderer_vulkan/vk_surface_params.h" +#include "video_core/renderer_vulkan/pica_to_vulkan.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/video_core.h" -namespace OpenGL { +namespace Vulkan { using PixelFormat = SurfaceParams::PixelFormat; using SurfaceType = SurfaceParams::SurfaceType; @@ -39,165 +41,81 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -static bool IsVendorAmd() { - const std::string_view gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc."; -} -static bool IsVendorIntel() { - std::string gpu_vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - return gpu_vendor == "Intel Inc."; -} - -RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window) - : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), - uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), - index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), - texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), - texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { - - allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && - GLAD_GL_ARB_framebuffer_no_attachments); - if (!allow_shadow) { - LOG_WARNING(Render_OpenGL, - "Shadow might not be able to render because of unsupported OpenGL extensions."); - } - - if (!GLAD_GL_ARB_copy_image && !GLES) { - LOG_WARNING(Render_OpenGL, - "ARB_copy_image not supported. Some games might produce artifacts."); - } +RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) { + // Implement shadow + allow_shadow = false; // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 - state.clip_distance[0] = true; - - // Create a 1x1 clear texture to use in the NULL case, - // instead of OpenGL's default of solid black - glGenTextures(1, &default_texture); - glBindTexture(GL_TEXTURE_2D, default_texture); - // For some reason alpha 0 wraps around to 1.0, so use 1/255 instead - u8 framebuffer_data[4] = {0, 0, 0, 1}; - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); - - // Create sampler objects - for (std::size_t i = 0; i < texture_samplers.size(); ++i) { - texture_samplers[i].Create(); - state.texture_units[i].sampler = texture_samplers[i].sampler.handle; - } - - // Create cubemap texture and sampler objects - texture_cube_sampler.Create(); - state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle; - - // Generate VAO - sw_vao.Create(); - hw_vao.Create(); + //state.clip_distance[0] = true; + // Setup uniform data uniform_block_data.dirty = true; - uniform_block_data.lighting_lut_dirty.fill(true); uniform_block_data.lighting_lut_dirty_any = true; - uniform_block_data.fog_lut_dirty = true; - uniform_block_data.proctex_noise_lut_dirty = true; uniform_block_data.proctex_color_map_dirty = true; uniform_block_data.proctex_alpha_map_dirty = true; uniform_block_data.proctex_lut_dirty = true; uniform_block_data.proctex_diff_lut_dirty = true; - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); - uniform_size_aligned_vs = - Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); - uniform_size_aligned_fs = - Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); + // Query uniform buffer alignment requirements + uniform_buffer_alignment = g_vk_instace->UniformMinAlignment(); + uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), + uniform_buffer_alignment); + uniform_size_aligned_fs = Common::AlignUp(sizeof(UniformData), + uniform_buffer_alignment); // Set vertex attributes for software shader path - state.draw.vertex_array = sw_vao.handle; - state.draw.vertex_buffer = vertex_buffer.GetHandle(); - state.Apply(); + state.SetVertexBuffer(&vertex_buffer, 0); - glVertexAttribPointer(ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, position)); - glEnableVertexAttribArray(ATTRIBUTE_POSITION); + // Allocate texture buffer LUTs + VKBuffer::Info texel_buffer_info = { + .size = TEXTURE_BUFFER_SIZE, + .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, + .usage = vk::BufferUsageFlagBits::eStorageTexelBuffer, + }; - glVertexAttribPointer(ATTRIBUTE_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, color)); - glEnableVertexAttribArray(ATTRIBUTE_COLOR); + texture_buffer_lut_lf.Create(texel_buffer_info); + texture_buffer_lut.Create(texel_buffer_info); - glVertexAttribPointer(ATTRIBUTE_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, tex_coord0)); - glVertexAttribPointer(ATTRIBUTE_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, tex_coord1)); - glVertexAttribPointer(ATTRIBUTE_TEXCOORD2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, tex_coord2)); - glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD0); - glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD1); - glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD2); + // Create and bind uniform buffers + VKBuffer::Info uniform_info = { + .size = UNIFORM_BUFFER_SIZE, + .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, + .usage = vk::BufferUsageFlagBits::eUniformBuffer + }; - glVertexAttribPointer(ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); - glEnableVertexAttribArray(ATTRIBUTE_TEXCOORD0_W); + uniform_buffer.Create(uniform_info); + state.SetUniformBuffer(BindingID::VertexUniform, &uniform_buffer, 0, uniform_size_aligned_vs); + state.SetUniformBuffer(BindingID::PicaUniform, &uniform_buffer, uniform_size_aligned_vs, uniform_size_aligned_fs); - glVertexAttribPointer(ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, normquat)); - glEnableVertexAttribArray(ATTRIBUTE_NORMQUAT); + // Bind texel buffers + state.SetTexelBuffer(BindingID::LutLF, &texture_buffer_lut_lf, vk::Format::eR32G32Sfloat); + state.SetTexelBuffer(BindingID::LutRG, &texture_buffer_lut, vk::Format::eR32G32Sfloat); + state.SetTexelBuffer(BindingID::LutRGBA, &texture_buffer_lut, vk::Format::eR32G32B32A32Sfloat); - glVertexAttribPointer(ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), - (GLvoid*)offsetof(HardwareVertex, view)); - glEnableVertexAttribArray(ATTRIBUTE_VIEW); + // Create vertex and index buffers + VKBuffer::Info vertex_info = { + .size = VERTEX_BUFFER_SIZE, + .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, + .usage = vk::BufferUsageFlagBits::eVertexBuffer + }; - // Create render framebuffer - framebuffer.Create(); + VKBuffer::Info index_info = { + .size = INDEX_BUFFER_SIZE, + .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, + .usage = vk::BufferUsageFlagBits::eIndexBuffer + }; - // Allocate and bind texture buffer lut textures - texture_buffer_lut_lf.Create(); - texture_buffer_lut_rg.Create(); - texture_buffer_lut_rgba.Create(); - state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle; - state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; - state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; - state.Apply(); - glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle()); - glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); - glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); - - // Bind index buffer for hardware shader path - state.draw.vertex_array = hw_vao.handle; - state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); - -#ifdef __APPLE__ - if (IsVendorIntel()) { - shader_program_manager = std::make_unique( - emu_window, - VideoCore::g_separable_shader_enabled ? GLAD_GL_ARB_separate_shader_objects : false, - is_amd); - } else { - shader_program_manager = std::make_unique( - emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); - } -#else - shader_program_manager = std::make_unique( - emu_window, GLAD_GL_ARB_separate_shader_objects, is_amd); -#endif - - glEnable(GL_BLEND); + vertex_buffer.Create(vertex_info); + index_buffer.Create(index_info); SyncEntireState(); } -RasterizerOpenGL::~RasterizerOpenGL() = default; - -void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_program_manager->LoadDiskCache(stop_loading, callback); -} - -void RasterizerOpenGL::SyncEntireState() { - // Sync fixed function OpenGL state +void RasterizerVulkan::SyncEntireState() { + // Sync fixed function Vulkan state SyncClipEnabled(); SyncCullMode(); SyncBlendEnabled(); @@ -263,7 +181,7 @@ static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4< return (Common::Dot(a, b) < 0.f); } -void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, +void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) { vertex_batch.emplace_back(v0, false); @@ -284,162 +202,6 @@ struct VertexArrayInfo { u32 vs_input_size; }; -RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - const auto& regs = Pica::g_state.regs; - const auto& vertex_attributes = regs.pipeline.vertex_attributes; - - u32 vertex_min; - u32 vertex_max; - if (is_indexed) { - const auto& index_info = regs.pipeline.index_array; - const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; - const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); - const u16* index_address_16 = reinterpret_cast(index_address_8); - const bool index_u16 = index_info.format != 0; - - vertex_min = 0xFFFF; - vertex_max = 0; - const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); - res_cache.FlushRegion(address, size, nullptr); - for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { - const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; - vertex_min = std::min(vertex_min, vertex); - vertex_max = std::max(vertex_max, vertex); - } - } else { - vertex_min = regs.pipeline.vertex_offset; - vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; - } - - const u32 vertex_num = vertex_max - vertex_min + 1; - u32 vs_input_size = 0; - for (const auto& loader : vertex_attributes.attribute_loaders) { - if (loader.component_count != 0) { - vs_input_size += loader.byte_count * vertex_num; - } - } - - return {vertex_min, vertex_max, vs_input_size}; -} - -void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, - GLuint vs_input_index_min, GLuint vs_input_index_max) { - MICROPROFILE_SCOPE(OpenGL_VAO); - const auto& regs = Pica::g_state.regs; - const auto& vertex_attributes = regs.pipeline.vertex_attributes; - PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); - - state.draw.vertex_array = hw_vao.handle; - state.draw.vertex_buffer = vertex_buffer.GetHandle(); - state.Apply(); - - std::array enable_attributes{}; - - for (const auto& loader : vertex_attributes.attribute_loaders) { - if (loader.component_count == 0 || loader.byte_count == 0) { - continue; - } - - u32 offset = 0; - for (u32 comp = 0; comp < loader.component_count && comp < 12; ++comp) { - u32 attribute_index = loader.GetComponent(comp); - if (attribute_index < 12) { - if (vertex_attributes.GetNumElements(attribute_index) != 0) { - offset = Common::AlignUp( - offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); - - u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); - GLint size = vertex_attributes.GetNumElements(attribute_index); - GLenum type = vs_attrib_types[static_cast( - vertex_attributes.GetFormat(attribute_index))]; - GLsizei stride = loader.byte_count; - glVertexAttribPointer(input_reg, size, type, GL_FALSE, stride, - reinterpret_cast(buffer_offset + offset)); - enable_attributes[input_reg] = true; - - offset += vertex_attributes.GetStride(attribute_index); - } - } else { - // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, - // respectively - offset = Common::AlignUp(offset, 4); - offset += (attribute_index - 11) * 4; - } - } - - PAddr data_addr = - base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); - - u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; - u32 data_size = loader.byte_count * vertex_num; - - res_cache.FlushRegion(data_addr, data_size, nullptr); - std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); - - array_ptr += data_size; - buffer_offset += data_size; - } - - for (std::size_t i = 0; i < enable_attributes.size(); ++i) { - if (enable_attributes[i] != hw_vao_enabled_attributes[i]) { - if (enable_attributes[i]) { - glEnableVertexAttribArray(static_cast(i)); - } else { - glDisableVertexAttribArray(static_cast(i)); - } - hw_vao_enabled_attributes[i] = enable_attributes[i]; - } - - if (vertex_attributes.IsDefaultAttribute(i)) { - const u32 reg = regs.vs.GetRegisterForAttribute(i); - if (!enable_attributes[reg]) { - const auto& attr = Pica::g_state.input_default_attributes.attr[i]; - glVertexAttrib4f(reg, attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), - attr.w.ToFloat32()); - } - } - } -} - -bool RasterizerOpenGL::SetupVertexShader() { - MICROPROFILE_SCOPE(OpenGL_VS); - return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs, - Pica::g_state.vs); -} - -bool RasterizerOpenGL::SetupGeometryShader() { - MICROPROFILE_SCOPE(OpenGL_GS); - const auto& regs = Pica::g_state.regs; - - if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { - LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader"); - return false; - } - - shader_program_manager->UseFixedGeometryShader(regs); - return true; -} - -bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { - const auto& regs = Pica::g_state.regs; - if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { - if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { - return false; - } - if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { - return false; - } - } - - if (!SetupVertexShader()) - return false; - - if (!SetupGeometryShader()) - return false; - - return Draw(true, is_indexed); -} - static GLenum GetCurrentPrimitiveMode() { const auto& regs = Pica::g_state.regs; switch (regs.pipeline.triangle_topology) { @@ -455,62 +217,13 @@ static GLenum GetCurrentPrimitiveMode() { } } -bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) { - const auto& regs = Pica::g_state.regs; - GLenum primitive_mode = GetCurrentPrimitiveMode(); - - auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed); - - if (vs_input_size > VERTEX_BUFFER_SIZE) { - LOG_WARNING(Render_OpenGL, "Too large vertex input size {}", vs_input_size); - return false; - } - - state.draw.vertex_buffer = vertex_buffer.GetHandle(); - state.Apply(); - - u8* buffer_ptr; - GLintptr buffer_offset; - std::tie(buffer_ptr, buffer_offset, std::ignore) = vertex_buffer.Map(vs_input_size, 4); - SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max); - vertex_buffer.Unmap(vs_input_size); - - shader_program_manager->ApplyTo(state); - state.Apply(); - - if (is_indexed) { - bool index_u16 = regs.pipeline.index_array.format != 0; - std::size_t index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); - - if (index_buffer_size > INDEX_BUFFER_SIZE) { - LOG_WARNING(Render_OpenGL, "Too large index input size {}", index_buffer_size); - return false; - } - - const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( - regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + - regs.pipeline.index_array.offset); - std::tie(buffer_ptr, buffer_offset, std::ignore) = index_buffer.Map(index_buffer_size, 4); - std::memcpy(buffer_ptr, index_data, index_buffer_size); - index_buffer.Unmap(index_buffer_size); - - glDrawRangeElementsBaseVertex( - primitive_mode, vs_input_index_min, vs_input_index_max, regs.pipeline.num_vertices, - index_u16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, - reinterpret_cast(buffer_offset), -static_cast(vs_input_index_min)); - } else { - glDrawArrays(primitive_mode, 0, regs.pipeline.num_vertices); - } - return true; -} - -void RasterizerOpenGL::DrawTriangles() { +void RasterizerVulkan::DrawTriangles() { if (vertex_batch.empty()) return; Draw(false, false); } -bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { +bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { MICROPROFILE_SCOPE(OpenGL_Drawing); const auto& regs = Pica::g_state.regs; @@ -520,21 +233,13 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { const bool has_stencil = regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE || - state.color_mask.green_enabled == GL_TRUE || - state.color_mask.blue_enabled == GL_TRUE || - state.color_mask.alpha_enabled == GL_TRUE; - - const bool write_depth_fb = - (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || - (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + const bool write_depth_fb = state.DepthTestEnabled() || (has_stencil && state.StencilTestEnabled()); const bool using_color_fb = - regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0; const bool using_depth_fb = !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && - (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || - (has_stencil && state.stencil.test_enabled)); + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0); Common::Rectangle viewport_rect_unscaled{ // These registers hold half-width and half-height, so must be multiplied by 2 @@ -548,8 +253,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { regs.rasterizer.viewport_corner.y // bottom }; - Surface color_surface; - Surface depth_surface; + Surface color_surface, depth_surface; Common::Rectangle surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); @@ -573,51 +277,12 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces - state.draw.draw_framebuffer = framebuffer.handle; - state.Apply(); - - if (shadow_rendering) { - if (!allow_shadow || color_surface == nullptr) { - return true; - } - glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, - color_surface->width * color_surface->res_scale); - glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT, - color_surface->height * color_surface->res_scale); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - state.image_shadow_buffer = color_surface->texture.handle; - } else { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); - if (depth_surface != nullptr) { - if (has_stencil) { - // attach both depth and stencil - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, depth_surface->texture.handle, 0); - } else { - // attach depth - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); - // clear stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - } - } else { - // clear both depth and stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - } - } + state.PushRenderTargets(&color_surface->texture, &depth_surface->texture); // Sync the viewport - state.viewport.x = - static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; - state.viewport.y = - static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; - state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); - state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); + vk::Viewport viewport(0, 0, viewport_rect_unscaled.GetWidth() * res_scale, + viewport_rect_unscaled.GetHeight() * res_scale); + state.SetViewport(viewport); if (uniform_block_data.data.framebuffer_scale != res_scale) { uniform_block_data.data.framebuffer_scale = res_scale; @@ -650,8 +315,8 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } bool need_duplicate_texture = false; - auto CheckBarrier = [&need_duplicate_texture, &color_surface](GLuint handle) { - if (color_surface && color_surface->texture.handle == handle) { + auto CheckBarrier = [&need_duplicate_texture, &color_surface](VKTexture* handle) { + if (color_surface && &color_surface->texture == handle) { need_duplicate_texture = true; } }; @@ -662,111 +327,10 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { const auto& texture = pica_textures[texture_index]; if (texture.enabled) { - if (texture_index == 0) { - using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; - switch (texture.config.type.Value()) { - case TextureType::Shadow2D: { - if (!allow_shadow) - continue; - - Surface surface = res_cache.GetTextureSurface(texture); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); - } else { - state.image_shadow_texture_px = 0; - } - continue; - } - case TextureType::ShadowCube: { - if (!allow_shadow) - continue; - Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister( - texture.config, texture.format); - Surface surface; - - using CubeFace = Pica::TexturingRegs::CubeFace; - info.physical_address = - regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); - surface = res_cache.GetTextureSurface(info); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); - } else { - state.image_shadow_texture_px = 0; - } - - info.physical_address = - regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); - surface = res_cache.GetTextureSurface(info); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_nx = surface->texture.handle); - } else { - state.image_shadow_texture_nx = 0; - } - - info.physical_address = - regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); - surface = res_cache.GetTextureSurface(info); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_py = surface->texture.handle); - } else { - state.image_shadow_texture_py = 0; - } - - info.physical_address = - regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); - surface = res_cache.GetTextureSurface(info); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_ny = surface->texture.handle); - } else { - state.image_shadow_texture_ny = 0; - } - - info.physical_address = - regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); - surface = res_cache.GetTextureSurface(info); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_pz = surface->texture.handle); - } else { - state.image_shadow_texture_pz = 0; - } - - info.physical_address = - regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); - surface = res_cache.GetTextureSurface(info); - if (surface != nullptr) { - CheckBarrier(state.image_shadow_texture_nz = surface->texture.handle); - } else { - state.image_shadow_texture_nz = 0; - } - - continue; - } - case TextureType::TextureCube: - using CubeFace = Pica::TexturingRegs::CubeFace; - TextureCubeConfig config; - config.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); - config.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); - config.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); - config.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); - config.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); - config.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); - config.width = texture.config.width; - config.format = texture.format; - state.texture_cube_unit.texture_cube = - res_cache.GetTextureCube(config).texture.handle; - - texture_cube_sampler.SyncWithConfig(texture.config); - state.texture_units[texture_index].texture_2d = 0; - continue; // Texture unit 0 setup finished. Continue to next unit - } - state.texture_cube_unit.texture_cube = 0; - } - - texture_samplers[texture_index].SyncWithConfig(texture.config); + //texture_samplers[texture_index].SyncWithConfig(texture.config); Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - CheckBarrier(state.texture_units[texture_index].texture_2d = - surface->texture.handle); + state.SetTexture(BindingID::Tex0 + texture_index, &surface->texture); } else { // Can occur when texture addr is null or its memory is unmapped/invalid // HACK: In this case, the correct behaviour for the PICA is to use the last @@ -775,47 +339,10 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // the geometry in question. // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn // on the male character's face, which in the OpenGL default appear black. - state.texture_units[texture_index].texture_2d = default_texture; + state.UnbindTexture(texture_index); } } else { - state.texture_units[texture_index].texture_2d = 0; - } - } - - OGLTexture temp_tex; - if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) { - // The game is trying to use a surface as a texture and framebuffer at the same time - // which causes unpredictable behavior on the host. - // Making a copy to sample from eliminates this issue and seems to be fairly cheap. - temp_tex.Create(); - glBindTexture(GL_TEXTURE_2D, temp_tex.handle); - auto [internal_format, format, type] = GetFormatTuple(color_surface->pixel_format); - OGLTexture::Allocate(GL_TEXTURE_2D, color_surface->max_level + 1, internal_format, format, - type, color_surface->GetScaledWidth(), - color_surface->GetScaledHeight()); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glBindTexture(GL_TEXTURE_2D, state.texture_units[0].texture_2d); - - for (std::size_t level{0}; level <= color_surface->max_level; ++level) { - glCopyImageSubData(color_surface->texture.handle, GL_TEXTURE_2D, level, 0, 0, 0, - temp_tex.handle, GL_TEXTURE_2D, level, 0, 0, 0, - color_surface->GetScaledWidth() >> level, - color_surface->GetScaledHeight() >> level, 1); - } - - for (auto& unit : state.texture_units) { - if (unit.texture_2d == color_surface->texture.handle) { - unit.texture_2d = temp_tex.handle; - } - } - for (auto shadow_unit : {&state.image_shadow_texture_nx, &state.image_shadow_texture_ny, - &state.image_shadow_texture_nz, &state.image_shadow_texture_px, - &state.image_shadow_texture_py, &state.image_shadow_texture_pz}) { - if (*shadow_unit == color_surface->texture.handle) { - *shadow_unit = temp_tex.handle; - } + state.UnbindTexture(texture_index); } } @@ -836,70 +363,64 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // dimensions than our framebuffer sub-rect. // Enable scissor test to prevent drawing // outside of the framebuffer region - state.scissor.enabled = true; - state.scissor.x = draw_rect.left; - state.scissor.y = draw_rect.bottom; - state.scissor.width = draw_rect.GetWidth(); - state.scissor.height = draw_rect.GetHeight(); - state.Apply(); + vk::Rect2D scissor(vk::Offset2D(draw_rect.left, draw_rect.bottom), + vk::Extent2D(draw_rect.GetHeight(), draw_rect.GetHeight())); + state.SetScissor(scissor); // Draw the vertex batch bool succeeded = true; - if (accelerate) { - succeeded = AccelerateDrawBatchInternal(is_indexed); - } else { - state.draw.vertex_array = sw_vao.handle; - state.draw.vertex_buffer = vertex_buffer.GetHandle(); - shader_program_manager->UseTrivialVertexShader(); - shader_program_manager->UseTrivialGeometryShader(); - shader_program_manager->ApplyTo(state); - state.Apply(); + shader_program_manager->UseTrivialVertexShader(); + shader_program_manager->UseTrivialGeometryShader(); + shader_program_manager->ApplyTo(state); + state.Apply(); - std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); - for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); - base_vertex += max_vertices) { - const std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); - const std::size_t vertex_size = vertices * sizeof(HardwareVertex); - u8* vbo; - GLintptr offset; - std::tie(vbo, offset, std::ignore) = - vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); - std::memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); - vertex_buffer.Unmap(vertex_size); - glDrawArrays(GL_TRIANGLES, static_cast(offset / sizeof(HardwareVertex)), - static_cast(vertices)); - } + std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); + for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { + const std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); + const std::size_t vertex_size = vertices * sizeof(HardwareVertex); + + auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(vertex_size); + std::memcpy(buffer, vertex_batch.data() + base_vertex, vertex_size); + + // Copy the vertex data + auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); + auto& staging = g_vk_task_scheduler->GetStaging(); + vk::BufferCopy copy_region(offset, 0, vertex_size); + + command_buffer.bindVertexBuffers(0, vertex_buffer.GetBuffer(), {offset}); + command_buffer.copyBuffer(staging.GetBuffer(), vertex_buffer.GetBuffer(), copy_region); + + // Issue a pipeline barrier and draw command + vk::BufferMemoryBarrier barrier { + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eVertexAttributeRead, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + vertex_buffer.GetBuffer(), 0, vertex_size + }; + + // Add a pipeline barrier for each region modified + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eVertexInput, + vk::DependencyFlagBits::eByRegion, + 0, nullptr, 1, &barrier, 0, nullptr); + + command_buffer.draw(vertices, 1, 0, 0); } vertex_batch.clear(); // Reset textures in rasterizer state context because the rasterizer cache might delete them for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { - state.texture_units[texture_index].texture_2d = 0; + state.UnbindTexture(texture_index); } - state.texture_cube_unit.texture_cube = 0; - if (allow_shadow) { - state.image_shadow_texture_px = 0; - state.image_shadow_texture_nx = 0; - state.image_shadow_texture_py = 0; - state.image_shadow_texture_ny = 0; - state.image_shadow_texture_pz = 0; - state.image_shadow_texture_nz = 0; - state.image_shadow_buffer = 0; - } - state.Apply(); - if (shadow_rendering) { - glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | - GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); - } + state.Apply(); // Mark framebuffer surfaces as dirty Common::Rectangle draw_rect_unscaled{draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, draw_rect.bottom / res_scale}; - if (color_surface != nullptr && write_color_fb) { + if (color_surface != nullptr) { auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), color_surface); @@ -913,7 +434,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { return succeeded; } -void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { +void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) { const auto& regs = Pica::g_state.regs; switch (id) { @@ -949,10 +470,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Blending case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): - if (GLES) { + //if (GLES) { // With GLES, we need this in the fragment shader to emulate logic operations - shader_dirty = true; - } + // shader_dirty = true; + //} SyncBlendEnabled(); break; case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): @@ -1073,10 +594,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Logic op case PICA_REG_INDEX(framebuffer.output_merger.logic_op): - if (GLES) { + //if (GLES) { // With GLES, we need this in the fragment shader to emulate logic operations - shader_dirty = true; - } + // shader_dirty = true; + //} SyncLogicOp(); break; @@ -1411,32 +932,32 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { } } -void RasterizerOpenGL::FlushAll() { +void RasterizerVulkan::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushAll(); } -void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { +void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { +void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); res_cache.InvalidateRegion(addr, size, nullptr); } -void RasterizerOpenGL::ClearAll(bool flush) { +void RasterizerVulkan::ClearAll(bool flush) { res_cache.ClearAll(flush); } -bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); SurfaceParams src_params; @@ -1487,7 +1008,7 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe return true; } -bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { +bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); if (copy_size == 0) { return false; @@ -1572,7 +1093,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon return true; } -bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { +bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { Surface dst_surface = res_cache.GetFillSurface(config); if (dst_surface == nullptr) return false; @@ -1581,7 +1102,7 @@ bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) return true; } -bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, +bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { if (framebuffer_addr == 0) { @@ -1614,97 +1135,22 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - screen_info.display_texture = src_surface->texture.handle; - + screen_info.display_texture = &src_surface->texture; return true; } -void RasterizerOpenGL::SamplerInfo::Create() { - sampler.Create(); - mag_filter = min_filter = mip_filter = TextureConfig::Linear; - wrap_s = wrap_t = TextureConfig::Repeat; - border_color = 0; - lod_min = lod_max = 0; - lod_bias = 0; - // default is 1000 and -1000 - // Other attributes have correct defaults - glSamplerParameterf(sampler.handle, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); - glSamplerParameterf(sampler.handle, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); -} - -void RasterizerOpenGL::SamplerInfo::SyncWithConfig( - const Pica::TexturingRegs::TextureConfig& config) { - - GLuint s = sampler.handle; - - if (mag_filter != config.mag_filter) { - mag_filter = config.mag_filter; - glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureMagFilterMode(mag_filter)); - } - - // TODO(wwylele): remove new_supress_mipmap_for_cube logic once mipmap for cube is implemented - bool new_supress_mipmap_for_cube = - config.type == Pica::TexturingRegs::TextureConfig::TextureCube; - if (min_filter != config.min_filter || mip_filter != config.mip_filter || - supress_mipmap_for_cube != new_supress_mipmap_for_cube) { - min_filter = config.min_filter; - mip_filter = config.mip_filter; - supress_mipmap_for_cube = new_supress_mipmap_for_cube; - if (new_supress_mipmap_for_cube) { - // HACK: use mag filter converter for min filter because they are the same anyway - glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, - PicaToGL::TextureMagFilterMode(min_filter)); - } else { - glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, - PicaToGL::TextureMinFilterMode(min_filter, mip_filter)); - } - } - - if (wrap_s != config.wrap_s) { - wrap_s = config.wrap_s; - glSamplerParameteri(s, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(wrap_s)); - } - if (wrap_t != config.wrap_t) { - wrap_t = config.wrap_t; - glSamplerParameteri(s, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(wrap_t)); - } - - if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { - if (border_color != config.border_color.raw) { - border_color = config.border_color.raw; - auto gl_color = PicaToGL::ColorRGBA8(border_color); - glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); - } - } - - if (lod_min != config.lod.min_level) { - lod_min = config.lod.min_level; - glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, static_cast(lod_min)); - } - - if (lod_max != config.lod.max_level) { - lod_max = config.lod.max_level; - glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, static_cast(lod_max)); - } - - if (!GLES && lod_bias != config.lod.bias) { - lod_bias = config.lod.bias; - glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias / 256.0f); - } -} - -void RasterizerOpenGL::SetShader() { +void RasterizerVulkan::SetShader() { shader_program_manager->UseFragmentShader(Pica::g_state.regs); } -void RasterizerOpenGL::SyncClipEnabled() { +void RasterizerVulkan::SyncClipEnabled() { state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; } -void RasterizerOpenGL::SyncClipCoef() { +void RasterizerVulkan::SyncClipCoef() { const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); - const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + const glm::vec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; if (new_clip_coef != uniform_block_data.data.clip_coef) { uniform_block_data.data.clip_coef = new_clip_coef; @@ -1712,33 +1158,33 @@ void RasterizerOpenGL::SyncClipCoef() { } } -void RasterizerOpenGL::SyncCullMode() { +void RasterizerVulkan::SyncCullMode() { const auto& regs = Pica::g_state.regs; switch (regs.rasterizer.cull_mode) { case Pica::RasterizerRegs::CullMode::KeepAll: - state.cull.enabled = false; + state.SetCullMode(vk::CullModeFlagBits::eNone); break; case Pica::RasterizerRegs::CullMode::KeepClockWise: - state.cull.enabled = true; - state.cull.front_face = GL_CW; + state.SetCullMode(vk::CullModeFlagBits::eBack); + state.SetFrontFace(vk::FrontFace::eClockwise); break; case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: - state.cull.enabled = true; - state.cull.front_face = GL_CCW; + state.SetCullMode(vk::CullModeFlagBits::eBack); + state.SetFrontFace(vk::FrontFace::eCounterClockwise); break; default: - LOG_CRITICAL(Render_OpenGL, "Unknown cull mode {}", + LOG_CRITICAL(Render_Vulkan, "Unknown cull mode {}", static_cast(regs.rasterizer.cull_mode.Value())); UNIMPLEMENTED(); break; } } -void RasterizerOpenGL::SyncDepthScale() { +void RasterizerVulkan::SyncDepthScale() { float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); if (depth_scale != uniform_block_data.data.depth_scale) { @@ -1747,7 +1193,7 @@ void RasterizerOpenGL::SyncDepthScale() { } } -void RasterizerOpenGL::SyncDepthOffset() { +void RasterizerVulkan::SyncDepthOffset() { float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32(); if (depth_offset != uniform_block_data.data.depth_offset) { @@ -1756,36 +1202,28 @@ void RasterizerOpenGL::SyncDepthOffset() { } } -void RasterizerOpenGL::SyncBlendEnabled() { - state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1); +void RasterizerVulkan::SyncBlendEnabled() { + state.SetBlendEnable(Pica::g_state.regs.framebuffer.output_merger.alphablend_enable); } -void RasterizerOpenGL::SyncBlendFuncs() { +void RasterizerVulkan::SyncBlendFuncs() { const auto& regs = Pica::g_state.regs; - state.blend.rgb_equation = - PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); - state.blend.a_equation = - PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); - state.blend.src_rgb_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); - state.blend.dst_rgb_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); - state.blend.src_a_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); - state.blend.dst_a_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); + auto rgb_op = PicaToVK::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + auto alpha_op = PicaToVK::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + auto src_color = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + auto dst_color = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + auto src_alpha = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); + auto dst_alpha = PicaToVK::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); + + state.SetBlendOp(rgb_op, alpha_op, src_color, dst_color, src_alpha, dst_alpha); } -void RasterizerOpenGL::SyncBlendColor() { - auto blend_color = - PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); - state.blend.color.red = blend_color[0]; - state.blend.color.green = blend_color[1]; - state.blend.color.blue = blend_color[2]; - state.blend.color.alpha = blend_color[3]; +void RasterizerVulkan::SyncBlendColor() { + auto blend_color = PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); + state.SetBlendCostants(blend_color.r, blend_color.g, blend_color.b, blend_color.a); } -void RasterizerOpenGL::SyncFogColor() { +void RasterizerVulkan::SyncFogColor() { const auto& regs = Pica::g_state.regs; uniform_block_data.data.fog_color = { regs.texturing.fog_color.r.Value() / 255.0f, @@ -1795,7 +1233,7 @@ void RasterizerOpenGL::SyncFogColor() { uniform_block_data.dirty = true; } -void RasterizerOpenGL::SyncProcTexNoise() { +void RasterizerVulkan::SyncProcTexNoise() { const auto& regs = Pica::g_state.regs.texturing; uniform_block_data.data.proctex_noise_f = { Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(), @@ -1813,7 +1251,7 @@ void RasterizerOpenGL::SyncProcTexNoise() { uniform_block_data.dirty = true; } -void RasterizerOpenGL::SyncProcTexBias() { +void RasterizerVulkan::SyncProcTexBias() { const auto& regs = Pica::g_state.regs.texturing; uniform_block_data.data.proctex_bias = Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8)) @@ -1822,7 +1260,7 @@ void RasterizerOpenGL::SyncProcTexBias() { uniform_block_data.dirty = true; } -void RasterizerOpenGL::SyncAlphaTest() { +void RasterizerVulkan::SyncAlphaTest() { const auto& regs = Pica::g_state.regs; if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; @@ -1830,101 +1268,76 @@ void RasterizerOpenGL::SyncAlphaTest() { } } -void RasterizerOpenGL::SyncLogicOp() { +void RasterizerVulkan::SyncLogicOp() { const auto& regs = Pica::g_state.regs; - state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); - - if (GLES) { - if (!regs.framebuffer.output_merger.alphablend_enable) { - if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { - // Color output is disabled by logic operation. We use color write mask to skip - // color but allow depth write. - state.color_mask = {}; - } - } - } + state.SetLogicOp(PicaToVK::LogicOp(regs.framebuffer.output_merger.logic_op)); } -void RasterizerOpenGL::SyncColorWriteMask() { +void RasterizerVulkan::SyncColorWriteMask() { const auto& regs = Pica::g_state.regs; - if (GLES) { - if (!regs.framebuffer.output_merger.alphablend_enable) { - if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { - // Color output is disabled by logic operation. We use color write mask to skip - // color but allow depth write. Return early to avoid overwriting this. - return; - } - } - } - auto IsColorWriteEnabled = [&](u32 value) { - return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE - : GL_FALSE; + auto WriteEnabled = [&](u32 value) { + return regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0; }; - state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable); - state.color_mask.green_enabled = - IsColorWriteEnabled(regs.framebuffer.output_merger.green_enable); - state.color_mask.blue_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.blue_enable); - state.color_mask.alpha_enabled = - IsColorWriteEnabled(regs.framebuffer.output_merger.alpha_enable); + state.SetColorMask(WriteEnabled(regs.framebuffer.output_merger.red_enable), + WriteEnabled(regs.framebuffer.output_merger.green_enable), + WriteEnabled(regs.framebuffer.output_merger.blue_enable), + WriteEnabled(regs.framebuffer.output_merger.alpha_enable)); } -void RasterizerOpenGL::SyncStencilWriteMask() { +void RasterizerVulkan::SyncStencilWriteMask() { const auto& regs = Pica::g_state.regs; - state.stencil.write_mask = - (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) - ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) - : 0; + state.SetStencilWrite((regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? regs.framebuffer.output_merger.stencil_test.write_mask + : 0); } -void RasterizerOpenGL::SyncDepthWriteMask() { +void RasterizerVulkan::SyncDepthWriteMask() { const auto& regs = Pica::g_state.regs; - state.depth.write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && - regs.framebuffer.output_merger.depth_write_enable) - ? GL_TRUE - : GL_FALSE; + state.SetDepthWrite(regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); } -void RasterizerOpenGL::SyncStencilTest() { +void RasterizerVulkan::SyncStencilTest() { const auto& regs = Pica::g_state.regs; - state.stencil.test_enabled = - regs.framebuffer.output_merger.stencil_test.enable && - regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - state.stencil.test_func = - PicaToGL::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); - state.stencil.test_ref = regs.framebuffer.output_merger.stencil_test.reference_value; - state.stencil.test_mask = regs.framebuffer.output_merger.stencil_test.input_mask; - state.stencil.action_stencil_fail = - PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); - state.stencil.action_depth_fail = - PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); - state.stencil.action_depth_pass = - PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); + + bool enabled = regs.framebuffer.output_merger.stencil_test.enable && + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + auto func = PicaToVK::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); + auto ref = regs.framebuffer.output_merger.stencil_test.reference_value; + auto mask = regs.framebuffer.output_merger.stencil_test.input_mask; + auto stencil_fail = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); + auto depth_fail = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); + auto depth_pass = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); + + state.SetStencilTest(enabled, stencil_fail, depth_pass, depth_fail, func, ref); + state.SetStencilInput(mask); } -void RasterizerOpenGL::SyncDepthTest() { +void RasterizerVulkan::SyncDepthTest() { const auto& regs = Pica::g_state.regs; - state.depth.test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || - regs.framebuffer.output_merger.depth_write_enable == 1; - state.depth.test_func = - regs.framebuffer.output_merger.depth_test_enable == 1 - ? PicaToGL::CompareFunc(regs.framebuffer.output_merger.depth_test_func) - : GL_ALWAYS; + bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + auto test_func = regs.framebuffer.output_merger.depth_test_enable == 1 + ? PicaToVK::CompareFunc(regs.framebuffer.output_merger.depth_test_func) + : vk::CompareOp::eAlways; + + state.SetDepthTest(test_enabled, test_func); } -void RasterizerOpenGL::SyncCombinerColor() { +void RasterizerVulkan::SyncCombinerColor() { auto combiner_color = - PicaToGL::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); + PicaToVK::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw); if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { uniform_block_data.data.tev_combiner_buffer_color = combiner_color; uniform_block_data.dirty = true; } } -void RasterizerOpenGL::SyncTevConstColor(std::size_t stage_index, +void RasterizerVulkan::SyncTevConstColor(std::size_t stage_index, const Pica::TexturingRegs::TevStageConfig& tev_stage) { - const auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); + const auto const_color = PicaToVK::ColorRGBA8(tev_stage.const_color); if (const_color == uniform_block_data.data.const_color[stage_index]) { return; @@ -1934,48 +1347,48 @@ void RasterizerOpenGL::SyncTevConstColor(std::size_t stage_index, uniform_block_data.dirty = true; } -void RasterizerOpenGL::SyncGlobalAmbient() { - auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); +void RasterizerVulkan::SyncGlobalAmbient() { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.global_ambient); if (color != uniform_block_data.data.lighting_global_ambient) { uniform_block_data.data.lighting_global_ambient = color; uniform_block_data.dirty = true; } } -void RasterizerOpenGL::SyncLightSpecular0(int light_index) { - auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); +void RasterizerVulkan::SyncLightSpecular0(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); if (color != uniform_block_data.data.light_src[light_index].specular_0) { uniform_block_data.data.light_src[light_index].specular_0 = color; uniform_block_data.dirty = true; } } -void RasterizerOpenGL::SyncLightSpecular1(int light_index) { - auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); +void RasterizerVulkan::SyncLightSpecular1(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); if (color != uniform_block_data.data.light_src[light_index].specular_1) { uniform_block_data.data.light_src[light_index].specular_1 = color; uniform_block_data.dirty = true; } } -void RasterizerOpenGL::SyncLightDiffuse(int light_index) { - auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); +void RasterizerVulkan::SyncLightDiffuse(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); if (color != uniform_block_data.data.light_src[light_index].diffuse) { uniform_block_data.data.light_src[light_index].diffuse = color; uniform_block_data.dirty = true; } } -void RasterizerOpenGL::SyncLightAmbient(int light_index) { - auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); +void RasterizerVulkan::SyncLightAmbient(int light_index) { + auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); if (color != uniform_block_data.data.light_src[light_index].ambient) { uniform_block_data.data.light_src[light_index].ambient = color; uniform_block_data.dirty = true; } } -void RasterizerOpenGL::SyncLightPosition(int light_index) { - GLvec3 position = { +void RasterizerVulkan::SyncLightPosition(int light_index) { + glm::vec3 position = { Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()}; @@ -1986,9 +1399,9 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { } } -void RasterizerOpenGL::SyncLightSpotDirection(int light_index) { +void RasterizerVulkan::SyncLightSpotDirection(int light_index) { const auto& light = Pica::g_state.regs.lighting.light[light_index]; - GLvec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f, + glm::vec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f}; if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { @@ -1997,7 +1410,7 @@ void RasterizerOpenGL::SyncLightSpotDirection(int light_index) { } } -void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { +void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) { GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) .ToFloat32(); @@ -2008,7 +1421,7 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { } } -void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { +void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) { GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale) .ToFloat32(); @@ -2019,7 +1432,7 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { } } -void RasterizerOpenGL::SyncShadowBias() { +void RasterizerVulkan::SyncShadowBias() { const auto& shadow = Pica::g_state.regs.framebuffer.shadow; GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32(); GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32(); @@ -2032,7 +1445,7 @@ void RasterizerOpenGL::SyncShadowBias() { } } -void RasterizerOpenGL::SyncShadowTextureBias() { +void RasterizerVulkan::SyncShadowTextureBias() { GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1; if (bias != uniform_block_data.data.shadow_texture_bias) { uniform_block_data.data.shadow_texture_bias = bias; @@ -2040,74 +1453,102 @@ void RasterizerOpenGL::SyncShadowTextureBias() { } } -void RasterizerOpenGL::SyncAndUploadLUTsLF() { - constexpr std::size_t max_size = - sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog +void RasterizerVulkan::SyncAndUploadLUTsLF() { + constexpr u32 sampler_size = sizeof(glm::vec2) * 256; + constexpr u32 fog_size = sizeof(glm::vec2) * 128; + constexpr u32 max_size = sampler_size * Pica::LightingRegs::NumLightingSampler + fog_size; if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { return; } - u8* buffer; - GLintptr offset; - bool invalidate; - std::size_t bytes_used = 0; - glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle()); - std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4)); + u32 copy_region_count = 0; + std::array regions{}; + auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(max_size); // Sync the lighting luts - if (uniform_block_data.lighting_lut_dirty_any || invalidate) { - for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { - if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { - std::array new_data; + auto copy_func = [](const auto& entry) { return glm::vec2{entry.ToFloat(), entry.DiffToFloat()}; }; + if (uniform_block_data.lighting_lut_dirty_any) { + for (u32 index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index]) { const auto& source_lut = Pica::g_state.lighting.luts[index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), - [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - if (new_data != lighting_lut_data[index] || invalidate) { + std::array new_data; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), copy_func); + + if (new_data != lighting_lut_data[index]) { lighting_lut_data[index] = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(GLvec2)); + + // Copy updated data to staging buffer + u32 byte_offset = sampler_size * index; + std::memcpy(buffer + byte_offset, new_data.data(), new_data.size() * sizeof(glm::vec2)); + + // TODO: Not needed with vulkan barriers, remove in the future when I touch the shaders uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = - static_cast((offset + bytes_used) / sizeof(GLvec2)); + byte_offset / sizeof(glm::vec2); uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(GLvec2); + + // Queue copy operation + regions[copy_region_count++] = vk::BufferCopy(offset + byte_offset, byte_offset, sampler_size); } uniform_block_data.lighting_lut_dirty[index] = false; } } + uniform_block_data.lighting_lut_dirty_any = false; } // Sync the fog lut - if (uniform_block_data.fog_lut_dirty || invalidate) { - std::array new_data; + if (uniform_block_data.fog_lut_dirty) { + std::array new_data; + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), copy_func); - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), - [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != fog_lut_data || invalidate) { + if (new_data != fog_lut_data) { fog_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); - uniform_block_data.data.fog_lut_offset = - static_cast((offset + bytes_used) / sizeof(GLvec2)); + + // Copy updated fog data to staging buffer + auto fog_offset = max_size - fog_size; + std::memcpy(buffer + fog_offset, new_data.data(), new_data.size() * sizeof(glm::vec2)); + uniform_block_data.data.fog_lut_offset = fog_offset / sizeof(glm::vec2); uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(GLvec2); + + // Queue copy operation + regions[copy_region_count++] = vk::BufferCopy(offset + fog_offset, fog_offset, fog_size); } + uniform_block_data.fog_lut_dirty = false; } - texture_lf_buffer.Unmap(bytes_used); + // Peform copy operation + auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); + auto& staging = g_vk_task_scheduler->GetStaging(); + command_buffer.copyBuffer(staging.GetBuffer(), texture_buffer_lut_lf.GetBuffer(), + copy_region_count, regions.data()); + + std::array barriers {{{ + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + texture_buffer_lut_lf.GetBuffer(), {}, {} + }}}; + + // Add a pipeline barrier for each region modified + for (int i = 0; i < copy_region_count; i++) { + auto& region = regions[i]; + auto& barrier = barriers[i]; + + barrier.setOffset(region.dstOffset); + barrier.setSize(region.size); + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + vk::DependencyFlagBits::eByRegion, + 0, nullptr, 1, &barrier, 0, nullptr); + } } -void RasterizerOpenGL::SyncAndUploadLUTs() { - constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha - sizeof(GLvec4) * 256 + // proctex - sizeof(GLvec4) * 256; // proctex diff +void RasterizerVulkan::SyncAndUploadLUTs() { + constexpr std::size_t max_size = sizeof(glm::vec2) * 128 * 3 + // proctex: noise + color + alpha + sizeof(glm::vec4) * 256 + // proctex + sizeof(glm::vec4) * 256; // proctex diff if (!uniform_block_data.proctex_noise_lut_dirty && !uniform_block_data.proctex_color_map_dirty && @@ -2116,137 +1557,183 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { return; } - u8* buffer; - GLintptr offset; - bool invalidate; - std::size_t bytes_used = 0; - glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); - std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + u32 copy_region_count = 0; + std::array regions{}; + auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(max_size); // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap - auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( - const std::array& lut, - std::array& lut_data, GLint& lut_offset) { - std::array new_data; + auto SyncLUT = [&, &buffer = buffer, &offset = offset](const auto& lut, auto& lut_data, int lut_offset) { + std::array new_data; std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + return glm::vec2{entry.ToFloat(), entry.DiffToFloat()}; }); - if (new_data != lut_data || invalidate) { + if (new_data != lut_data) { lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); - lut_offset = static_cast((offset + bytes_used) / sizeof(GLvec2)); + + auto data_size = new_data.size() * sizeof(glm::vec2); + std::memcpy(buffer + lut_offset, new_data.data(), data_size); uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(GLvec2); + + // Queue copy operation + regions[copy_region_count++] = vk::BufferCopy(offset + lut_offset, lut_offset, data_size); } }; // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { - SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - uniform_block_data.data.proctex_noise_lut_offset); + if (uniform_block_data.proctex_noise_lut_dirty) { + uniform_block_data.data.proctex_noise_lut_offset = 0; + SyncLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; } // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty || invalidate) { - SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + if (uniform_block_data.proctex_color_map_dirty) { + uniform_block_data.data.proctex_color_map_offset = sizeof(glm::vec2) * 128; + SyncLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; } // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { - SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + if (uniform_block_data.proctex_alpha_map_dirty) { + uniform_block_data.data.proctex_alpha_map_offset = sizeof(glm::vec2) * 128 * 2; + SyncLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; } // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty || invalidate) { - std::array new_data; + auto rgba_func = [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return glm::vec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }; + if (uniform_block_data.proctex_lut_dirty) { + uniform_block_data.data.proctex_lut_offset = sizeof(glm::vec2) * 128 * 3; + + std::array new_data; std::transform(Pica::g_state.proctex.color_table.begin(), Pica::g_state.proctex.color_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); + rgba_func); - if (new_data != proctex_lut_data || invalidate) { + if (new_data != proctex_lut_data) { proctex_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); - uniform_block_data.data.proctex_lut_offset = - static_cast((offset + bytes_used) / sizeof(GLvec4)); + + auto offset = uniform_block_data.data.proctex_lut_offset; + auto data_size = new_data.size() * sizeof(glm::vec4); + std::memcpy(buffer + offset, new_data.data(), data_size); + + regions[copy_region_count++] = vk::BufferCopy(offset, offset, data_size); uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(GLvec4); } + uniform_block_data.proctex_lut_dirty = false; } // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { - std::array new_data; + if (uniform_block_data.proctex_diff_lut_dirty) { + uniform_block_data.data.proctex_diff_lut_offset = sizeof(glm::vec2) * 128 * 3 + sizeof(glm::vec4) * 256; + std::array new_data; std::transform(Pica::g_state.proctex.color_diff_table.begin(), Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); + rgba_func); - if (new_data != proctex_diff_lut_data || invalidate) { + if (new_data != proctex_diff_lut_data) { proctex_diff_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); - uniform_block_data.data.proctex_diff_lut_offset = - static_cast((offset + bytes_used) / sizeof(GLvec4)); + + auto offset = uniform_block_data.data.proctex_diff_lut_offset; + auto data_size = new_data.size() * sizeof(glm::vec4); + std::memcpy(buffer + offset, new_data.data(), data_size); + + regions[copy_region_count++] = vk::BufferCopy(offset, offset, data_size); uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(GLvec4); } + uniform_block_data.proctex_diff_lut_dirty = false; } - texture_buffer.Unmap(bytes_used); + // Peform copy operation + auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); + auto& staging = g_vk_task_scheduler->GetStaging(); + command_buffer.copyBuffer(staging.GetBuffer(), texture_buffer_lut.GetBuffer(), + copy_region_count, regions.data()); + + std::array barriers {{{ + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + texture_buffer_lut.GetBuffer(), {}, {} + }}}; + + // Add a pipeline barrier for each region modified + for (int i = 0; i < copy_region_count; i++) { + auto& region = regions[i]; + auto& barrier = barriers[i]; + + barrier.setOffset(region.dstOffset); + barrier.setSize(region.size); + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + vk::DependencyFlagBits::eByRegion, + 0, nullptr, 1, &barrier, 0, nullptr); + } } -void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { - // glBindBufferRange below also changes the generic buffer binding point, so we sync the state - // first - state.draw.uniform_buffer = uniform_buffer.GetHandle(); - state.Apply(); - +void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { + u32 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + u32 vs_uniform_size = sizeof(VSUniformData); bool sync_vs = accelerate_draw; bool sync_fs = uniform_block_data.dirty; - if (!sync_vs && !sync_fs) - return; - - std::size_t uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; - std::size_t used_bytes = 0; - u8* uniforms; - GLintptr offset; - bool invalidate; - std::tie(uniforms, offset, invalidate) = - uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + u32 copy_region_count = 0; + std::array regions{}; + auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(uniform_size); if (sync_vs) { VSUniformData vs_uniforms; vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); - std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); - glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(UniformBindings::VS), - uniform_buffer.GetHandle(), offset + used_bytes, sizeof(VSUniformData)); - used_bytes += uniform_size_aligned_vs; + + std::memcpy(buffer, &vs_uniforms, vs_uniform_size); + regions[copy_region_count++] = vk::BufferCopy(offset, 0, vs_uniform_size); } - if (sync_fs || invalidate) { - std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(UniformData)); - glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(UniformBindings::Common), - uniform_buffer.GetHandle(), offset + used_bytes, sizeof(UniformData)); + if (sync_fs) { uniform_block_data.dirty = false; - used_bytes += uniform_size_aligned_fs; + + std::memcpy(buffer + vs_uniform_size, &uniform_block_data.data, sizeof(UniformData)); + regions[copy_region_count++] = vk::BufferCopy(offset + vs_uniform_size, vs_uniform_size, sizeof(UniformData)); } - uniform_buffer.Unmap(used_bytes); + // Peform copy operation + auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); + auto& staging = g_vk_task_scheduler->GetStaging(); + command_buffer.copyBuffer(staging.GetBuffer(), uniform_buffer.GetBuffer(), + copy_region_count, regions.data()); + + std::array barriers {{{ + vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eUniformRead, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + uniform_buffer.GetBuffer(), {}, {} + }}}; + + // Add a pipeline barrier for each region modified + for (int i = 0; i < copy_region_count; i++) { + auto& region = regions[i]; + auto& barrier = barriers[i]; + + barrier.setOffset(region.dstOffset); + barrier.setSize(region.size); + command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eVertexShader | + vk::PipelineStageFlagBits::eFragmentShader, + vk::DependencyFlagBits::eByRegion, + 0, nullptr, 1, &barrier, 0, nullptr); + } } -} // namespace OpenGL +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 9c11dca6e..4483799ad 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,8 @@ #include "video_core/regs_rasterizer.h" #include "video_core/regs_texturing.h" #include "video_core/shader/shader.h" +#include "video_core/renderer_vulkan/vk_state.h" +#include "video_core/renderer_vulkan/vk_rasterizer_cache.h" namespace Frontend { class EmuWindow; @@ -31,10 +33,89 @@ class EmuWindow; namespace Vulkan { class ShaderProgramManager; +enum class UniformBindings : u32 { Common, VS, GS }; + +struct LightSrc { + alignas(16) glm::vec3 specular_0; + alignas(16) glm::vec3 specular_1; + alignas(16) glm::vec3 diffuse; + alignas(16) glm::vec3 ambient; + alignas(16) glm::vec3 position; + alignas(16) glm::vec3 spot_direction; // negated + float dist_atten_bias; + float dist_atten_scale; +}; + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct UniformData { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + alignas(16) glm::ivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; + alignas(16) glm::vec3 fog_color; + alignas(8) glm::vec2 proctex_noise_f; + alignas(8) glm::vec2 proctex_noise_a; + alignas(8) glm::vec2 proctex_noise_p; + alignas(16) glm::vec3 lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) glm::vec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) glm::vec4 tev_combiner_buffer_color; + alignas(16) glm::vec4 clip_coef; +}; + +static_assert( + sizeof(UniformData) == 0x4F0, + "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. +// NOTE: the same rule from UniformData also applies here. +struct PicaUniformsData { + void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup); + + struct BoolAligned { + alignas(16) int b; + }; + + std::array bools; + alignas(16) std::array i; + alignas(16) std::array f; +}; + +struct VSUniformData { + PicaUniformsData uniforms; +}; +static_assert( + sizeof(VSUniformData) == 1856, + "The size of the VSUniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(VSUniformData) < 16384, + "VSUniformData structure must be less than 16kb as per the OpenGL spec"); + +struct ScreenInfo; + class RasterizerVulkan : public VideoCore::RasterizerInterface { public: explicit RasterizerVulkan(Frontend::EmuWindow& emu_window); - ~RasterizerVulkan() override; + ~RasterizerVulkan() override = default; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -52,43 +133,17 @@ public: bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, - u32 pixel_stride, OpenGL::ScreenInfo& screen_info) override; + u32 pixel_stride, Vulkan::ScreenInfo& screen_info) override; bool AccelerateDrawBatch(bool is_indexed) override; /// Syncs entire status to match PICA registers void SyncEntireState() override; private: - struct SamplerInfo { - using TextureConfig = Pica::TexturingRegs::TextureConfig; - - OGLSampler sampler; - - /// Creates the sampler object, initializing its state so that it's in sync with the - /// SamplerInfo struct. - void Create(); - /// Syncs the sampler object with the config, updating any necessary state. - void SyncWithConfig(const TextureConfig& config); - - private: - TextureConfig::TextureFilter mag_filter; - TextureConfig::TextureFilter min_filter; - TextureConfig::TextureFilter mip_filter; - TextureConfig::WrapMode wrap_s; - TextureConfig::WrapMode wrap_t; - u32 border_color; - u32 lod_min; - u32 lod_max; - s32 lod_bias; - - // TODO(wwylele): remove this once mipmap for cube is implemented - bool supress_mipmap_for_cube = false; - }; - - struct VertexInfo + struct VertexBase { - VertexInfo() = default; - VertexInfo(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { + VertexBase() = default; + VertexBase(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { position[0] = v.pos.x.ToFloat32(); position[1] = v.pos.y.ToFloat32(); position[2] = v.pos.z.ToFloat32(); @@ -128,21 +183,21 @@ private: }; /// Structure that the hardware rendered vertices are composed of - struct HardwareVertex : public VertexInfo + struct HardwareVertex : public VertexBase { HardwareVertex() = default; - HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexInfo(v, flip_quaternion) {}; - static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexInfo)); + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexBase(v, flip_quaternion) {}; + static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexBase)); static constexpr std::array attribute_desc = { - vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, position)), - vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, color)), - vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord0)), - vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord1)), - vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexInfo, tex_coord2)), - vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexInfo, tex_coord0_w)), - vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexInfo, normquat)), - vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexInfo, view)), + vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, position)), + vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, color)), + vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord0)), + vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord1)), + vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord2)), + vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexBase, tex_coord0_w)), + vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, normquat)), + vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexBase, view)), }; }; @@ -254,30 +309,16 @@ private: /// Generic draw function for DrawTriangles and AccelerateDrawBatch bool Draw(bool accelerate, bool is_indexed); - /// Internal implementation for AccelerateDrawBatch - bool AccelerateDrawBatchInternal(bool is_indexed); - struct VertexArrayInfo { u32 vs_input_index_min; u32 vs_input_index_max; u32 vs_input_size; }; - /// Retrieve the range and the size of the input vertex - VertexArrayInfo AnalyzeVertexArray(bool is_indexed); +private: + VulkanState state; - /// Setup vertex shader for AccelerateDrawBatch - bool SetupVertexShader(); - - /// Setup geometry shader for AccelerateDrawBatch - bool SetupGeometryShader(); - - bool is_amd; - - OpenGLState state; - GLuint default_texture; - - RasterizerCacheOpenGL res_cache; + RasterizerCacheVulkan res_cache; std::vector vertex_batch; @@ -304,33 +345,23 @@ private: static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; - OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw - std::array hw_vao_enabled_attributes{}; - std::array texture_samplers; - OGLStreamBuffer vertex_buffer; - OGLStreamBuffer uniform_buffer; - OGLStreamBuffer index_buffer; - OGLStreamBuffer texture_buffer; - OGLStreamBuffer texture_lf_buffer; - OGLFramebuffer framebuffer; - GLint uniform_buffer_alignment; - std::size_t uniform_size_aligned_vs; - std::size_t uniform_size_aligned_fs; + VKBuffer vertex_buffer, uniform_buffer, index_buffer; + VKBuffer texture_buffer_lut_lf, texture_buffer_lut; + + u32 uniform_buffer_alignment; + u32 uniform_size_aligned_vs, uniform_size_aligned_fs; SamplerInfo texture_cube_sampler; - OGLTexture texture_buffer_lut_lf; - OGLTexture texture_buffer_lut_rg; - OGLTexture texture_buffer_lut_rgba; - - std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; - std::array fog_lut_data{}; - std::array proctex_noise_lut_data{}; - std::array proctex_color_map_data{}; - std::array proctex_alpha_map_data{}; - std::array proctex_lut_data{}; - std::array proctex_diff_lut_data{}; + std::array, + Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; + std::array fog_lut_data{}; + std::array proctex_noise_lut_data{}; + std::array proctex_color_map_data{}; + std::array proctex_alpha_map_data{}; + std::array proctex_lut_data{}; + std::array proctex_diff_lut_data{}; bool allow_shadow; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp index 8ef4b8ae2..fabfe55f9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -38,6 +38,14 @@ namespace Vulkan { using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; +static constexpr std::array fb_format_tuples = {{ + vk::Format::eR8G8B8A8Uint, // RGBA8 + vk::Format::eR8G8B8Uint, // RGB8 + vk::Format::eR5G5B5A1UnormPack16, // RGB5A1 + vk::Format::eR5G6B5UnormPack16, // RGB565 + vk::Format::eR4G4B4A4UnormPack16, // RGBA4 +}}; + template static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { return boost::make_iterator_range(map.equal_range(interval)); @@ -55,13 +63,33 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gpu_buffer) { if constexpr (format == PixelFormat::D24S8) { gpu_ptr[0] = tile_ptr[3]; std::memcpy(gpu_ptr + 1, tile_ptr, 3); - } else { - std::memcpy(gpu_ptr, tile_ptr, bytes_per_pixel); - } + } else if (format == PixelFormat::RGBA8) { + gpu_ptr[0] = tile_ptr[3]; + gpu_ptr[1] = tile_ptr[2]; + gpu_ptr[2] = tile_ptr[1]; + gpu_ptr[3] = tile_ptr[0]; + } else if (format == PixelFormat::RGB8) { + gpu_ptr[0] = tile_ptr[2]; + gpu_ptr[1] = tile_ptr[1]; + gpu_ptr[2] = tile_ptr[0]; + } else { + std::memcpy(gpu_ptr, tile_ptr, bytes_per_pixel); + } } else { if constexpr (format == PixelFormat::D24S8) { std::memcpy(tile_ptr, gpu_ptr + 1, 3); tile_ptr[3] = gpu_ptr[0]; + } else if (format == PixelFormat::RGBA8) { + // because GLES does not have ABGR format + // so we will do byteswapping here + tile_ptr[0] = gpu_ptr[3]; + tile_ptr[1] = gpu_ptr[2]; + tile_ptr[2] = gpu_ptr[1]; + tile_ptr[3] = gpu_ptr[0]; + } else if (format == PixelFormat::RGB8) { + tile_ptr[0] = gpu_ptr[2]; + tile_ptr[1] = gpu_ptr[1]; + tile_ptr[2] = gpu_ptr[0]; } else { std::memcpy(tile_ptr, gpu_ptr, bytes_per_pixel); } @@ -205,76 +233,6 @@ VKTexture RasterizerCacheVulkan::AllocateSurfaceTexture(vk::Format format, u32 w return texture; } -static bool FillSurface(const Surface& surface, const u8* fill_data, - const Common::Rectangle& fill_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.scissor.enabled = true; - state.scissor.x = static_cast(fill_rect.left); - state.scissor.y = static_cast(fill_rect.bottom); - state.scissor.width = static_cast(fill_rect.GetWidth()); - state.scissor.height = static_cast(fill_rect.GetHeight()); - - state.draw.draw_framebuffer = draw_fb_handle; - state.Apply(); - - surface->InvalidateAllWatcher(); - - if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { - Pica::Texture::TextureInfo tex_info{}; - tex_info.format = static_cast(surface->pixel_format); - Common::Vec4 color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info); - - std::array color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f, - color.w / 255.f}; - - state.color_mask.red_enabled = GL_TRUE; - state.color_mask.green_enabled = GL_TRUE; - state.color_mask.blue_enabled = GL_TRUE; - state.color_mask.alpha_enabled = GL_TRUE; - state.Apply(); - glClearBufferfv(GL_COLOR, 0, &color_values[0]); - } else if (surface->type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - surface->texture.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - u32 value_32bit = 0; - GLfloat value_float; - - if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { - std::memcpy(&value_32bit, fill_data, 2); - value_float = value_32bit / 65535.0f; // 2^16 - 1 - } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { - std::memcpy(&value_32bit, fill_data, 3); - value_float = value_32bit / 16777215.0f; // 2^24 - 1 - } - - state.depth.write_mask = GL_TRUE; - state.Apply(); - glClearBufferfv(GL_DEPTH, 0, &value_float); - } else if (surface->type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - surface->texture.handle, 0); - - u32 value_32bit; - std::memcpy(&value_32bit, fill_data, sizeof(u32)); - - GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 - GLint value_int = (value_32bit >> 24); - - state.depth.write_mask = GL_TRUE; - state.stencil.write_mask = -1; - state.Apply(); - glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int); - } - return true; -} - CachedSurface::~CachedSurface() { if (texture.IsValid()) { auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8), @@ -339,23 +297,13 @@ void RasterizerCacheVulkan::CopySurface(const Surface& src_surface, const Surfac // This is only called when CanCopy is true, no need to run checks here if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u32 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array fill_buffer; - - u32 fill_buff_pos = fill_offset; - for (int i : {0, 1, 2, 3}) - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - - FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), - draw_framebuffer.handle); - return; + // NO-OP Vulkan does not allow easy clearing for arbitary textures with rectangle + printf("bad!"); } if (src_surface->CanSubRect(subrect_params)) { auto srect = src_surface->GetScaledSubRect(subrect_params); auto drect = dst_surface->GetScaledSubRect(subrect_params); - src_surface->texture.BlitTo(srect, dst_surface->texture, drect, src_surface->type); + src_surface->texture.BlitTo(srect, &dst_surface->texture, drect, src_surface->type); return; } @@ -363,7 +311,7 @@ void RasterizerCacheVulkan::CopySurface(const Surface& src_surface, const Surfac } MICROPROFILE_DEFINE(Vulkan_SurfaceLoad, "Vulkan", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { +void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) { ASSERT(type != SurfaceType::Fill); const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr); @@ -371,7 +319,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { return; if (vk_buffer.empty()) { - vk_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format)); + vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); } // TODO: Should probably be done in ::Memory:: and check for other regions too @@ -419,12 +367,12 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { +void CachedSurface::FlushGPUBuffer(PAddr flush_start, PAddr flush_end) { u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); if (dst_buffer == nullptr) return; - ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format)); + ASSERT(vk_buffer.size() == width * height * GetBytesPerPixel(pixel_format)); // TODO: Should probably be done in ::Memory:: and check for other regions too // same as loadglbuffer() @@ -455,135 +403,42 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { if (backup_bytes) std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); } else if (!is_tiled) { - ASSERT(type == SurfaceType::Color); - if (pixel_format == PixelFormat::RGBA8 && GLES) { + if (pixel_format == PixelFormat::RGBA8) { for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { - dst_buffer[i] = gl_buffer[i + 3]; - dst_buffer[i + 1] = gl_buffer[i + 2]; - dst_buffer[i + 2] = gl_buffer[i + 1]; - dst_buffer[i + 3] = gl_buffer[i]; + dst_buffer[i] = vk_buffer[i + 3]; + dst_buffer[i + 1] = vk_buffer[i + 2]; + dst_buffer[i + 2] = vk_buffer[i + 1]; + dst_buffer[i + 3] = vk_buffer[i]; } - } else if (pixel_format == PixelFormat::RGB8 && GLES) { + } else if (pixel_format == PixelFormat::RGB8) { for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { - dst_buffer[i] = gl_buffer[i + 2]; - dst_buffer[i + 1] = gl_buffer[i + 1]; - dst_buffer[i + 2] = gl_buffer[i]; + dst_buffer[i] = vk_buffer[i + 2]; + dst_buffer[i + 1] = vk_buffer[i + 1]; + dst_buffer[i + 2] = vk_buffer[i]; } } else { - std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], + std::memcpy(dst_buffer + start_offset, &vk_buffer[start_offset], flush_end - flush_start); } } else { - gpu_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + gpu_to_morton_fns[static_cast(pixel_format)](stride, height, &vk_buffer[0], addr, flush_start, flush_end); } } -bool CachedSurface::LoadCustomTexture(u64 tex_hash) { - auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); - const auto& image_interface = Core::System::GetInstance().GetImageInterface(); - - if (custom_tex_cache.IsTextureCached(tex_hash)) { - custom_tex_info = custom_tex_cache.LookupTexture(tex_hash); - return true; - } - - if (!custom_tex_cache.CustomTextureExists(tex_hash)) { - return false; - } - - const auto& path_info = custom_tex_cache.LookupTexturePathInfo(tex_hash); - if (!image_interface->DecodePNG(custom_tex_info.tex, custom_tex_info.width, - custom_tex_info.height, path_info.path)) { - LOG_ERROR(Render_OpenGL, "Failed to load custom texture {}", path_info.path); - return false; - } - - const std::bitset<32> width_bits(custom_tex_info.width); - const std::bitset<32> height_bits(custom_tex_info.height); - if (width_bits.count() != 1 || height_bits.count() != 1) { - LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path); - return false; - } - - LOG_DEBUG(Render_OpenGL, "Loaded custom texture from {}", path_info.path); - Common::FlipRGBA8Texture(custom_tex_info.tex, custom_tex_info.width, custom_tex_info.height); - custom_tex_cache.CacheTexture(tex_hash, custom_tex_info.tex, custom_tex_info.width, - custom_tex_info.height); - return true; -} - -void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) { - // Make sure the texture size is a power of 2 - // If not, the surface is actually a framebuffer - std::bitset<32> width_bits(width); - std::bitset<32> height_bits(height); - if (width_bits.count() != 1 || height_bits.count() != 1) { - LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})", - tex_hash, width, height); - return; - } - - // Dump texture to RGBA8 and encode as PNG - const auto& image_interface = Core::System::GetInstance().GetImageInterface(); - auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); - std::string dump_path = - fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), - Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id); - if (!FileUtil::CreateFullPath(dump_path)) { - LOG_ERROR(Render, "Unable to create {}", dump_path); - return; - } - - dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, tex_hash, pixel_format); - if (!custom_tex_cache.IsTextureDumped(tex_hash) && !FileUtil::Exists(dump_path)) { - custom_tex_cache.SetTextureDumped(tex_hash); - - LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path); - std::vector decoded_texture; - decoded_texture.resize(width * height * 4); - OpenGLState state = OpenGLState::GetCurState(); - GLuint old_texture = state.texture_units[0].texture_2d; - state.Apply(); - /* - GetTexImageOES is used even if not using OpenGL ES to work around a small issue that - happens if using custom textures with texture dumping at the same. - Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a - higher quality 256x256 texture. If the 256x256 texture is displayed first and the - 32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture - will appear in the corner of the 256x256 texture. If texture dumping is enabled and - the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL - texture is still 256x256, Citra crashes because it thinks the texture is only 32x32. - GetTexImageOES conveniently only dumps the specified region, and works on both - desktop and ES. - */ - // if the backend isn't OpenGL ES, this won't be initialized yet - if (!owner.texture_downloader_es) - owner.texture_downloader_es = std::make_unique(false); - owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, - height, width, &decoded_texture[0]); - state.texture_units[0].texture_2d = old_texture; - state.Apply(); - Common::FlipRGBA8Texture(decoded_texture, width, height); - if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height)) - LOG_ERROR(Render_OpenGL, "Failed to save decoded texture"); - } -} - MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { +void CachedSurface::UploadGPUTexture(Common::Rectangle rect) { if (type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format)); + ASSERT(vk_buffer.size() == width * height * GetBytesPerPixel(pixel_format)); u64 tex_hash = 0; if (Settings::values.dump_textures || Settings::values.custom_textures) { - tex_hash = Common::ComputeHash64(gl_buffer.data(), gl_buffer.size()); + tex_hash = Common::ComputeHash64(vk_buffer.data(), vk_buffer.size()); } if (Settings::values.custom_textures) { @@ -591,62 +446,49 @@ void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_ } // Load data from memory to the surface - GLint x0 = static_cast(rect.left); - GLint y0 = static_cast(rect.bottom); - std::size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + int x0 = static_cast(rect.left); + int y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format); - GLuint target_tex = texture.handle; + //const FormatTuple& tuple = GetFormatTuple(pixel_format); + //GLuint target_tex = texture.handle; // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in // surface - OGLTexture unscaled_tex; + VKTexture unscaled_tex; if (res_scale != 1) { x0 = 0; y0 = 0; - if (is_custom) { - unscaled_tex = owner.AllocateSurfaceTexture( - GetFormatTuple(PixelFormat::RGBA8), custom_tex_info.width, custom_tex_info.height); - } else { - unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight()); - } - target_tex = unscaled_tex.handle; + VKTexture::Info info = { + .width = rect.GetWidth(), + .height = rect.GetHeight(), + .format = fb_format_tuples[static_cast(pixel_format)], + .type = vk::ImageType::e2D, + .view_type = vk::ImageViewType::e2D + }; + + unscaled_tex.Create(info); } - OpenGLState cur_state = OpenGLState::GetCurState(); + //OpenGLState cur_state = OpenGLState::GetCurState(); - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = target_tex; - cur_state.Apply(); + //GLuint old_tex = cur_state.texture_units[0].texture_2d; + //cur_state.texture_units[0].texture_2d = target_tex; + //cur_state.Apply(); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - if (is_custom) { - if (res_scale == 1) { - texture = owner.AllocateSurfaceTexture(GetFormatTuple(PixelFormat::RGBA8), - custom_tex_info.width, custom_tex_info.height); - cur_state.texture_units[0].texture_2d = texture.handle; - cur_state.Apply(); - } - // always going to be using rgba8 - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(custom_tex_info.width)); + //ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + //glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); - glActiveTexture(GL_TEXTURE0); - glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, custom_tex_info.width, custom_tex_info.height, - GL_RGBA, GL_UNSIGNED_BYTE, custom_tex_info.tex.data()); - } else { - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); - - glActiveTexture(GL_TEXTURE0); + //glActiveTexture(GL_TEXTURE0); glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - if (Settings::values.dump_textures && !is_custom) - DumpTexture(target_tex, tex_hash); + //if (Settings::values.dump_textures && !is_custom) + // DumpTexture(target_tex, tex_hash); cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); @@ -671,8 +513,7 @@ void CachedSurface::UploadGLTexture(Common::Rectangle rect, GLuint read_fb_ } MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { +void CachedSurface::DownloadGPUTexture(const Common::Rectangle& rect) { if (type == SurfaceType::Fill) { return; } @@ -870,8 +711,6 @@ RasterizerCacheVulkan::RasterizerCacheVulkan() { texture_filterer = std::make_unique(Settings::values.texture_filter_name, resolution_scale_factor); format_reinterpreter = std::make_unique(); - if (GLES) - texture_downloader_es = std::make_unique(false); read_framebuffer.Create(); draw_framebuffer.Create(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h index 2c5f030e9..5451e6b97 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h @@ -200,10 +200,6 @@ struct CachedSurface : SurfaceParams, std::enable_shared_from_this rect); void DownloadGPUTexture(const Common::Rectangle& rect); @@ -342,9 +338,6 @@ private: SurfaceMap dirty_regions; SurfaceSet remove_surfaces; - VKFramebuffer read_framebuffer; - VKFramebuffer draw_framebuffer; - u16 resolution_scale_factor; std::unordered_map texture_cube_cache; diff --git a/src/video_core/renderer_vulkan/vk_resource_cache.cpp b/src/video_core/renderer_vulkan/vk_resource_cache.cpp index c5cea0de2..068f9b94e 100644 --- a/src/video_core/renderer_vulkan/vk_resource_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_cache.cpp @@ -122,4 +122,379 @@ vk::RenderPass VKResourceCache::GetRenderPass(vk::Format color_format, vk::Forma renderpass_cache.emplace(key, std::move(renderpass)); return handle; } + +Pipeline::Pipeline() { Clear(); } + +void Pipeline::Clear() +{ + m_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + m_ci.pNext = nullptr; + m_ci.flags = 0; + m_ci.pSetLayouts = nullptr; + m_ci.setLayoutCount = 0; + m_ci.pPushConstantRanges = nullptr; + m_ci.pushConstantRangeCount = 0; +} + +void Pipeline::Build() { + VkPipelineLayout layout; + VkResult res = vkCreatePipelineLayout(device, &m_ci, nullptr, &layout); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreatePipelineLayout() failed: "); + return VK_NULL_HANDLE; + } + + Clear(); + return layout; +} + +void Pipeline::AddDescriptorSet(VkDescriptorSetLayout layout) +{ + pxAssert(m_ci.setLayoutCount < MAX_SETS); + + m_sets[m_ci.setLayoutCount] = layout; + + m_ci.setLayoutCount++; + m_ci.pSetLayouts = m_sets.data(); +} + +void Pipeline::AddPushConstants(VkShaderStageFlags stages, u32 offset, u32 size) +{ + pxAssert(m_ci.pushConstantRangeCount < MAX_PUSH_CONSTANTS); + + VkPushConstantRange& r = m_push_constants[m_ci.pushConstantRangeCount]; + r.stageFlags = stages; + r.offset = offset; + r.size = size; + + m_ci.pushConstantRangeCount++; + m_ci.pPushConstantRanges = m_push_constants.data(); +} + +GraphicsPipelineBuilder::GraphicsPipelineBuilder() { Clear(); } + +void GraphicsPipelineBuilder::Clear() +{ + m_ci = {}; + m_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + + m_shader_stages = {}; + + m_vertex_input_state = {}; + m_vertex_input_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + m_ci.pVertexInputState = &m_vertex_input_state; + m_vertex_attributes = {}; + m_vertex_buffers = {}; + + m_input_assembly = {}; + m_input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + + m_rasterization_state = {}; + m_rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + m_rasterization_state.lineWidth = 1.0f; + m_depth_state = {}; + m_depth_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + m_blend_state = {}; + m_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + m_blend_attachments = {}; + + m_viewport_state = {}; + m_viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + m_viewport = {}; + m_scissor = {}; + + m_dynamic_state = {}; + m_dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + m_dynamic_state_values = {}; + + m_multisample_state = {}; + m_multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + + m_provoking_vertex = {}; + m_provoking_vertex.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT; + + // set defaults + SetNoCullRasterizationState(); + SetNoDepthTestState(); + SetNoBlendingState(); + SetPrimitiveTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST); + + // have to be specified even if dynamic + SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); + SetScissorRect(0, 0, 1, 1); + SetMultisamples(VK_SAMPLE_COUNT_1_BIT); +} + +VkPipeline GraphicsPipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear /* = true */) +{ + VkPipeline pipeline; + VkResult res = vkCreateGraphicsPipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines() failed: "); + return VK_NULL_HANDLE; + } + + if (clear) + Clear(); + + return pipeline; +} + +void GraphicsPipelineBuilder::SetShaderStage( + VkShaderStageFlagBits stage, VkShaderModule module, const char* entry_point) +{ + pxAssert(m_ci.stageCount < MAX_SHADER_STAGES); + + u32 index = 0; + for (; index < m_ci.stageCount; index++) + { + if (m_shader_stages[index].stage == stage) + break; + } + if (index == m_ci.stageCount) + { + m_ci.stageCount++; + m_ci.pStages = m_shader_stages.data(); + } + + VkPipelineShaderStageCreateInfo& s = m_shader_stages[index]; + s.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + s.stage = stage; + s.module = module; + s.pName = entry_point; +} + +void GraphicsPipelineBuilder::AddVertexBuffer( + u32 binding, u32 stride, VkVertexInputRate input_rate /*= VK_VERTEX_INPUT_RATE_VERTEX*/) +{ + pxAssert(m_vertex_input_state.vertexAttributeDescriptionCount < MAX_VERTEX_BUFFERS); + + VkVertexInputBindingDescription& b = m_vertex_buffers[m_vertex_input_state.vertexBindingDescriptionCount]; + b.binding = binding; + b.stride = stride; + b.inputRate = input_rate; + + m_vertex_input_state.vertexBindingDescriptionCount++; + m_vertex_input_state.pVertexBindingDescriptions = m_vertex_buffers.data(); + m_ci.pVertexInputState = &m_vertex_input_state; +} + +void GraphicsPipelineBuilder::AddVertexAttribute(u32 location, u32 binding, VkFormat format, u32 offset) +{ + pxAssert(m_vertex_input_state.vertexAttributeDescriptionCount < MAX_VERTEX_BUFFERS); + + VkVertexInputAttributeDescription& a = + m_vertex_attributes[m_vertex_input_state.vertexAttributeDescriptionCount]; + a.location = location; + a.binding = binding; + a.format = format; + a.offset = offset; + + m_vertex_input_state.vertexAttributeDescriptionCount++; + m_vertex_input_state.pVertexAttributeDescriptions = m_vertex_attributes.data(); + m_ci.pVertexInputState = &m_vertex_input_state; +} + +void GraphicsPipelineBuilder::SetPrimitiveTopology( + VkPrimitiveTopology topology, bool enable_primitive_restart /*= false*/) +{ + m_input_assembly.topology = topology; + m_input_assembly.primitiveRestartEnable = enable_primitive_restart; + + m_ci.pInputAssemblyState = &m_input_assembly; +} + +void GraphicsPipelineBuilder::SetRasterizationState( + VkPolygonMode polygon_mode, VkCullModeFlags cull_mode, VkFrontFace front_face) +{ + m_rasterization_state.polygonMode = polygon_mode; + m_rasterization_state.cullMode = cull_mode; + m_rasterization_state.frontFace = front_face; + + m_ci.pRasterizationState = &m_rasterization_state; +} + +void GraphicsPipelineBuilder::SetLineWidth(float width) { m_rasterization_state.lineWidth = width; } + +void GraphicsPipelineBuilder::SetMultisamples(u32 multisamples, bool per_sample_shading) +{ + m_multisample_state.rasterizationSamples = static_cast(multisamples); + m_multisample_state.sampleShadingEnable = per_sample_shading; + m_multisample_state.minSampleShading = (multisamples > 1) ? 1.0f : 0.0f; +} + +void GraphicsPipelineBuilder::SetNoCullRasterizationState() +{ + SetRasterizationState(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, VK_FRONT_FACE_CLOCKWISE); +} + +void GraphicsPipelineBuilder::SetDepthState(bool depth_test, bool depth_write, VkCompareOp compare_op) +{ + m_depth_state.depthTestEnable = depth_test; + m_depth_state.depthWriteEnable = depth_write; + m_depth_state.depthCompareOp = compare_op; + + m_ci.pDepthStencilState = &m_depth_state; +} + +void GraphicsPipelineBuilder::SetStencilState( + bool stencil_test, const VkStencilOpState& front, const VkStencilOpState& back) +{ + m_depth_state.stencilTestEnable = stencil_test; + m_depth_state.front = front; + m_depth_state.back = back; +} + +void GraphicsPipelineBuilder::SetNoStencilState() +{ + m_depth_state.stencilTestEnable = VK_FALSE; + m_depth_state.front = {}; + m_depth_state.back = {}; +} + +void GraphicsPipelineBuilder::SetNoDepthTestState() { SetDepthState(false, false, VK_COMPARE_OP_ALWAYS); } + +void GraphicsPipelineBuilder::SetBlendConstants(float r, float g, float b, float a) +{ + m_blend_state.blendConstants[0] = r; + m_blend_state.blendConstants[1] = g; + m_blend_state.blendConstants[2] = b; + m_blend_state.blendConstants[3] = a; + m_ci.pColorBlendState = &m_blend_state; +} + +void GraphicsPipelineBuilder::AddBlendAttachment(bool blend_enable, VkBlendFactor src_factor, + VkBlendFactor dst_factor, VkBlendOp op, VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, + VkBlendOp alpha_op, + VkColorComponentFlags + write_mask /* = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT */) +{ + pxAssert(m_blend_state.attachmentCount < MAX_ATTACHMENTS); + + VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[m_blend_state.attachmentCount]; + bs.blendEnable = blend_enable; + bs.srcColorBlendFactor = src_factor; + bs.dstColorBlendFactor = dst_factor; + bs.colorBlendOp = op; + bs.srcAlphaBlendFactor = alpha_src_factor; + bs.dstAlphaBlendFactor = alpha_dst_factor; + bs.alphaBlendOp = alpha_op; + bs.colorWriteMask = write_mask; + + m_blend_state.attachmentCount++; + m_blend_state.pAttachments = m_blend_attachments.data(); + m_ci.pColorBlendState = &m_blend_state; +} + +void GraphicsPipelineBuilder::SetBlendAttachment(u32 attachment, bool blend_enable, VkBlendFactor src_factor, + VkBlendFactor dst_factor, VkBlendOp op, VkBlendFactor alpha_src_factor, VkBlendFactor alpha_dst_factor, + VkBlendOp alpha_op, + VkColorComponentFlags + write_mask /*= VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT*/) +{ + pxAssert(attachment < MAX_ATTACHMENTS); + + VkPipelineColorBlendAttachmentState& bs = m_blend_attachments[attachment]; + bs.blendEnable = blend_enable; + bs.srcColorBlendFactor = src_factor; + bs.dstColorBlendFactor = dst_factor; + bs.colorBlendOp = op; + bs.srcAlphaBlendFactor = alpha_src_factor; + bs.dstAlphaBlendFactor = alpha_dst_factor; + bs.alphaBlendOp = alpha_op; + bs.colorWriteMask = write_mask; + + if (attachment >= m_blend_state.attachmentCount) + { + m_blend_state.attachmentCount = attachment + 1u; + m_blend_state.pAttachments = m_blend_attachments.data(); + m_ci.pColorBlendState = &m_blend_state; + } +} + +void GraphicsPipelineBuilder::AddBlendFlags(u32 flags) +{ + m_blend_state.flags |= flags; +} + +void GraphicsPipelineBuilder::ClearBlendAttachments() +{ + m_blend_attachments = {}; + m_blend_state.attachmentCount = 0; +} + +void GraphicsPipelineBuilder::SetNoBlendingState() +{ + ClearBlendAttachments(); + SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT); +} + +void GraphicsPipelineBuilder::AddDynamicState(VkDynamicState state) +{ + pxAssert(m_dynamic_state.dynamicStateCount < MAX_DYNAMIC_STATE); + + m_dynamic_state_values[m_dynamic_state.dynamicStateCount] = state; + m_dynamic_state.dynamicStateCount++; + m_dynamic_state.pDynamicStates = m_dynamic_state_values.data(); + m_ci.pDynamicState = &m_dynamic_state; +} + +void GraphicsPipelineBuilder::SetDynamicViewportAndScissorState() +{ + AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT); + AddDynamicState(VK_DYNAMIC_STATE_SCISSOR); +} + +void GraphicsPipelineBuilder::SetViewport( + float x, float y, float width, float height, float min_depth, float max_depth) +{ + m_viewport.x = x; + m_viewport.y = y; + m_viewport.width = width; + m_viewport.height = height; + m_viewport.minDepth = min_depth; + m_viewport.maxDepth = max_depth; + + m_viewport_state.pViewports = &m_viewport; + m_viewport_state.viewportCount = 1u; + m_ci.pViewportState = &m_viewport_state; +} + +void GraphicsPipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) +{ + m_scissor.offset.x = x; + m_scissor.offset.y = y; + m_scissor.extent.width = width; + m_scissor.extent.height = height; + + m_viewport_state.pScissors = &m_scissor; + m_viewport_state.scissorCount = 1u; + m_ci.pViewportState = &m_viewport_state; +} + +void GraphicsPipelineBuilder::SetMultisamples(VkSampleCountFlagBits samples) +{ + m_multisample_state.rasterizationSamples = samples; + m_ci.pMultisampleState = &m_multisample_state; +} + +void GraphicsPipelineBuilder::SetPipelineLayout(VkPipelineLayout layout) { m_ci.layout = layout; } + +void GraphicsPipelineBuilder::SetRenderPass(VkRenderPass render_pass, u32 subpass) +{ + m_ci.renderPass = render_pass; + m_ci.subpass = subpass; +} + +void GraphicsPipelineBuilder::SetProvokingVertex(VkProvokingVertexModeEXT mode) +{ + Util::AddPointerToChain(&m_rasterization_state, &m_provoking_vertex); + + m_provoking_vertex.provokingVertexMode = mode; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_cache.h b/src/video_core/renderer_vulkan/vk_resource_cache.h index 330d731ec..09d52bdce 100644 --- a/src/video_core/renderer_vulkan/vk_resource_cache.h +++ b/src/video_core/renderer_vulkan/vk_resource_cache.h @@ -53,6 +53,56 @@ private: std::string pipeline_cache_filename; }; +constexpr u32 MAX_DYNAMIC_STATES = 8; +constexpr u32 MAX_ATTACHMENTS = 2; +constexpr u32 MAX_VERTEX_BUFFERS = 3; + +class Pipeline { +public: + Pipeline(); + ~Pipeline() = default; + + void Build(); + + void SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module); + + void AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate); + void AddVertexAttribute(u32 location, u32 binding, VkFormat format, u32 offset); + + void SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart = false); + void SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode, + vk::FrontFace front_face); + + void SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op); + void SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back); + void SetNoDepthTestState(); + void SetNoStencilState(); + + void AddDynamicState(vk::DynamicState state); + void SetMultisamples(VkSampleCountFlagBits samples); + +private: + vk::GraphicsPipelineCreateInfo pipeline_info; + std::array shader_stages; + + vk::PipelineVertexInputStateCreateInfo vertex_input_state; + vk::PipelineInputAssemblyStateCreateInfo input_assembly; + vk::PipelineRasterizationStateCreateInfo rasterization_state; + vk::PipelineDepthStencilStateCreateInfo depth_state; + + // Blending + vk::PipelineColorBlendStateCreateInfo blend_state; + std::array blend_attachments; + std::array dynamic_state_values; + + VkPipelineViewportStateCreateInfo m_viewport_state; + VkViewport m_viewport; + VkRect2D m_scissor; + + VkPipelineDynamicStateCreateInfo m_dynamic_state; + vk::PipelineMultisampleStateCreateInfo multisample_info; +}; + extern std::unique_ptr g_vk_res_cache; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state.cpp b/src/video_core/renderer_vulkan/vk_state.cpp index 8a0cde6c0..6729af9b3 100644 --- a/src/video_core/renderer_vulkan/vk_state.cpp +++ b/src/video_core/renderer_vulkan/vk_state.cpp @@ -2,25 +2,37 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include "video_core/renderer_vulkan/vk_state.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" #include "video_core/renderer_vulkan/vk_resource_cache.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" namespace Vulkan { -std::unique_ptr g_vk_state; - -// Define bitwise operators for DirtyState enum -DirtyState operator |=(DirtyState lhs, DirtyState rhs) { - return static_cast ( +// Define bitwise operators for DirtyFlags enum +DirtyFlags operator |=(DirtyFlags lhs, DirtyFlags rhs) { + return static_cast ( static_cast(lhs) | static_cast(rhs) ); } -bool operator &(DirtyState lhs, DirtyState rhs) { - return static_cast(lhs) & - static_cast(rhs); +bool operator &(DirtyFlags lhs, DirtyFlags rhs) { + return static_cast(lhs) & + static_cast(rhs); +} + +bool operator >(BindingID lhs, BindingID rhs) { + return static_cast(lhs) & + static_cast(rhs); +} + +bool operator <(BindingID lhs, BindingID rhs) { + return static_cast(lhs) & + static_cast(rhs); } void VulkanState::Create() { @@ -34,7 +46,7 @@ void VulkanState::Create() { }; dummy_texture.Create(info); - dummy_texture.TransitionLayout(vk::ImageLayout::eShaderReadOnlyOptimal, g_vk_task_scheduler->GetCommandBuffer()); + dummy_texture.Transition(vk::ImageLayout::eShaderReadOnlyOptimal); // Create descriptor pool // TODO: Choose sizes more wisely @@ -53,7 +65,22 @@ void VulkanState::Create() { vk::DescriptorSetAllocateInfo alloc_info(desc_pool.get(), layouts); descriptor_sets = device.allocateDescriptorSetsUnique(alloc_info); - dirty_flags |= DirtyState::All; + // Create texture sampler + auto props = g_vk_instace->GetPhysicalDevice().getProperties(); + vk::SamplerCreateInfo sampler_info + ( + {}, vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerMipmapMode::eNearest, vk::SamplerAddressMode::eClampToEdge, + vk::SamplerAddressMode::eClampToEdge, vk::SamplerAddressMode::eClampToEdge, + {}, true, props.limits.maxSamplerAnisotropy, + false, vk::CompareOp::eAlways, {}, {}, + vk::BorderColor::eIntOpaqueBlack, false + ); + + sampler = g_vk_instace->GetDevice().createSamplerUnique(sampler_info); + dirty_flags |= DirtyFlags::All; + + CompileTrivialShader(); } void VulkanState::SetVertexBuffer(VKBuffer* buffer, vk::DeviceSize offset) { @@ -63,56 +90,66 @@ void VulkanState::SetVertexBuffer(VKBuffer* buffer, vk::DeviceSize offset) { vertex_buffer = buffer; vertex_offset = offset; - dirty_flags |= DirtyState::VertexBuffer; + dirty_flags |= DirtyFlags::VertexBuffer; } -void VulkanState::SetUniformBuffer(UniformID id, VKBuffer* buffer, u32 offset, u32 size) { +void VulkanState::SetUniformBuffer(BindingID id, VKBuffer* buffer, u32 offset, u32 size) { + assert(id < BindingID::Tex0); u32 index = static_cast(id); - auto& binding = bindings.ubo[index]; - if (binding.buffer != buffer->GetBuffer() || binding.range != size) - { - binding.buffer = buffer->GetBuffer(); - binding.range = size; - dirty_flags |= DirtyState::Uniform; - bindings.ubo_update[index] = true; + + auto& binding = bindings[index]; + auto old_buffer = std::get(binding.resource); + if (old_buffer != buffer) { + binding.resource = buffer; + dirty_flags |= DirtyFlags::Uniform; + binding.dirty = true; } } -void VulkanState::SetTexture(TextureID id, VKTexture* texture) { +void VulkanState::SetTexture(BindingID id, VKTexture* image) { + assert(id > BindingID::PicaUniform && id < BindingID::LutLF); u32 index = static_cast(id); - if (bindings.texture[index].imageView == texture->GetView()) { - return; - } - bindings.texture[index].imageView = texture->GetView(); - bindings.texture[index].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; - dirty_flags |= DirtyState::Texture; - bindings.texture_update[index] = true; + auto& binding = bindings[index]; + auto old_image = std::get(binding.resource); + if (old_image != image) { + binding.resource = image; + dirty_flags |= DirtyFlags::Texture; + binding.dirty = true; + } } -void VulkanState::SetTexelBuffer(TexelBufferID id, VKBuffer* buffer) { +void VulkanState::SetTexelBuffer(BindingID id, VKBuffer* buffer, vk::Format view_format) { + assert(id > BindingID::TexCube); u32 index = static_cast(id); - if (bindings.lut[index].buffer == buffer->GetBuffer()) { - return; - } - bindings.lut[index].buffer = buffer->GetBuffer(); - dirty_flags |= DirtyState::TexelBuffer; - bindings.lut_update[index] = true; + auto& binding = bindings[index]; + auto old_buffer = std::get(binding.resource); + if (old_buffer != buffer) { + auto& device = g_vk_instace->GetDevice(); + + binding.resource = buffer; + binding.buffer_view = device.createBufferViewUnique({{}, buffer->GetBuffer(), view_format}); + dirty_flags |= DirtyFlags::TexelBuffer; + binding.dirty = true; + } } void VulkanState::UnbindTexture(VKTexture* image) { - // Search the texture bindings for the view - // and replace it with the dummy texture if found - for (auto& it : bindings.texture) { - if (it.imageView == image->GetView()) { - it.imageView = dummy_texture.GetView(); - it.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + for (auto i = u32(BindingID::Tex0); i <= u32(BindingID::TexCube); i++) { + auto current_image = std::get(bindings[i].resource); + if (current_image == image) { + UnbindTexture(i); } } } -void VulkanState::SetAttachments(VKTexture* color, VKTexture* depth_stencil) { +void VulkanState::UnbindTexture(u32 index) { + bindings[index].resource = &dummy_texture; + dirty_flags |= DirtyFlags::Texture; +} + +void VulkanState::PushRenderTargets(VKTexture* color, VKTexture* depth_stencil) { color_attachment = color; depth_attachment = depth_stencil; } @@ -127,14 +164,8 @@ void VulkanState::BeginRendering() { } // Make sure attachments are in optimal layout - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - if (color_attachment->GetLayout() != vk::ImageLayout::eColorAttachmentOptimal) { - color_attachment->TransitionLayout(vk::ImageLayout::eColorAttachmentOptimal, command_buffer); - } - - if (depth_attachment->GetLayout() != vk::ImageLayout::eDepthStencilAttachmentOptimal) { - depth_attachment->TransitionLayout(vk::ImageLayout::eDepthStencilAttachmentOptimal, command_buffer); - } + color_attachment->Transition(vk::ImageLayout::eColorAttachmentOptimal); + depth_attachment->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal); // Begin rendering vk::RenderingAttachmentInfoKHR color_info(color_attachment->GetView(), color_attachment->GetLayout()); @@ -148,6 +179,7 @@ void VulkanState::BeginRendering() { &depth_stencil_info ); + auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); command_buffer.beginRendering(render_info); rendering = true; } @@ -163,21 +195,104 @@ void VulkanState::EndRendering() { } void VulkanState::SetViewport(vk::Viewport new_viewport) { - if (new_viewport == viewport) { - return; + if (new_viewport != viewport) { + viewport = new_viewport; + dirty_flags |= DirtyFlags::Viewport; } - - viewport = new_viewport; - dirty_flags |= DirtyState::Viewport; } void VulkanState::SetScissor(vk::Rect2D new_scissor) { - if (new_scissor == scissor) { - return; + if (new_scissor != scissor) { + scissor = new_scissor; + dirty_flags |= DirtyFlags::Scissor; } +} - scissor = new_scissor; - dirty_flags |= DirtyState::Scissor; +void VulkanState::SetCullMode(vk::CullModeFlags flags) { + if (cull_mode != flags) { + cull_mode = flags; + dirty_flags |= DirtyFlags::CullMode; + } +} + +void VulkanState::SetFrontFace(vk::FrontFace face) { + if (front_face != face) { + front_face = face; + dirty_flags |= DirtyFlags::FrontFace; + } +} + +void VulkanState::SetLogicOp(vk::LogicOp new_logic_op) { + if (logic_op != new_logic_op) { + logic_op = new_logic_op; + dirty_flags |= DirtyFlags::LogicOp; + } +} + +void VulkanState::SetColorMask(bool red, bool green, bool blue, bool alpha) { + auto mask = static_cast(red | (green << 1) | (blue << 2) | (alpha << 3)); + static_state.blend.setColorWriteMask(mask); +} + +void VulkanState::SetBlendEnable(bool enable) { + static_state.blend.setBlendEnable(enable); +} + +void VulkanState::SetBlendCostants(float red, float green, float blue, float alpha) { + std::array color = { red, green, blue, alpha }; + if (color != blend_constants) { + blend_constants = color; + dirty_flags = DirtyFlags::BlendConsts; + } +} + +void VulkanState::SetStencilWrite(u32 mask) { + if (mask != stencil_write_mask) { + stencil_write_mask = mask; + dirty_flags |= DirtyFlags::StencilMask; + } +} + +void VulkanState::SetStencilInput(u32 mask) { + if (mask != stencil_input_mask) { + stencil_input_mask = mask; + dirty_flags |= DirtyFlags::StencilMask; + } +} + +void VulkanState::SetStencilTest(bool enable, vk::StencilOp fail, vk::StencilOp pass, vk::StencilOp depth_fail, + vk::CompareOp compare, u32 ref) { + stencil_enabled = enable; + stencil_ref = ref; + fail_op = fail; + pass_op = pass; + depth_fail_op = depth_fail; + compare_op = compare; + dirty_flags |= DirtyFlags::Stencil; +} + +void VulkanState::SetDepthWrite(bool enable) { + if (enable != depth_writes) { + depth_writes = enable; + dirty_flags |= DirtyFlags::DepthWrite; + } +} + +void VulkanState::SetDepthTest(bool enable, vk::CompareOp compare) { + depth_enabled = enable; + test_func = compare; + dirty_flags |= DirtyFlags::DepthTest; +} + +void VulkanState::SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::BlendFactor src_color, + vk::BlendFactor dst_color, vk::BlendFactor src_alpha, vk::BlendFactor dst_alpha) { + auto& blend = static_state.blend; + blend.setColorBlendOp(rgb_op); + blend.setAlphaBlendOp(alpha_op); + blend.setSrcColorBlendFactor(src_color); + blend.setDstColorBlendFactor(dst_color); + blend.setSrcAlphaBlendFactor(src_alpha); + blend.setDstAlphaBlendFactor(dst_alpha); } void VulkanState::Apply() { @@ -189,56 +304,65 @@ void VulkanState::Apply() { // Re-apply dynamic parts of the pipeline auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - if (dirty_flags & DirtyState::VertexBuffer) { + if (dirty_flags & DirtyFlags::VertexBuffer) { command_buffer.bindVertexBuffers(0, vertex_buffer->GetBuffer(), vertex_offset); } - if (dirty_flags & DirtyState::IndexBuffer) { + if (dirty_flags & DirtyFlags::IndexBuffer) { command_buffer.bindIndexBuffer(index_buffer->GetBuffer(), index_offset, vk::IndexType::eUint16); } - if (dirty_flags & DirtyState::Viewport) { + if (dirty_flags & DirtyFlags::Viewport) { command_buffer.setViewport(0, viewport); } - if (dirty_flags & DirtyState::Scissor) { + if (dirty_flags & DirtyFlags::Scissor) { command_buffer.setScissor(0, scissor); } - dirty_flags = DirtyState::None; + dirty_flags = DirtyFlags::None; } void VulkanState::UpdateDescriptorSet() { std::vector writes; + std::vector buffer_infos; + std::vector image_infos; + auto& device = g_vk_instace->GetDevice(); // Check if any resource has been updated - if (dirty_flags & DirtyState::Uniform) { + if (dirty_flags & DirtyFlags::Uniform) { for (int i = 0; i < 2; i++) { - if (bindings.ubo_update[i]) { + if (bindings[i].dirty) { + auto buffer = std::get(bindings[i].resource); + buffer_infos.emplace_back(buffer->GetBuffer(), 0, VK_WHOLE_SIZE); writes.emplace_back(descriptor_sets[i].get(), i, 0, 1, vk::DescriptorType::eUniformBuffer, - nullptr, &bindings.ubo[i]); - bindings.ubo_update[i] = false; + nullptr, &buffer_infos.back(), nullptr); + bindings[i].dirty = false; } } } - if (dirty_flags & DirtyState::Texture) { - for (int i = 0; i < 4; i++) { - if (bindings.texture_update[i]) { + if (dirty_flags & DirtyFlags::Texture) { + for (int i = 2; i < 6; i++) { + if (bindings[i].dirty) { + auto texture = std::get(bindings[i].resource); + image_infos.emplace_back(sampler.get(), texture->GetView(), vk::ImageLayout::eShaderReadOnlyOptimal); writes.emplace_back(descriptor_sets[i].get(), i, 0, 1, vk::DescriptorType::eCombinedImageSampler, - nullptr, &bindings.texture[i]); - bindings.texture_update[i] = false; + &image_infos.back()); + bindings[i].dirty = false; } } } - if (dirty_flags & DirtyState::TexelBuffer) { - for (int i = 0; i < 3; i++) { - if (bindings.lut_update[i]) { + if (dirty_flags & DirtyFlags::TexelBuffer) { + for (int i = 6; i < 9; i++) { + if (bindings[i].dirty) { + auto buffer = std::get(bindings[i].resource); + buffer_infos.emplace_back(buffer->GetBuffer(), 0, VK_WHOLE_SIZE); writes.emplace_back(descriptor_sets[i].get(), i, 0, 1, vk::DescriptorType::eStorageTexelBuffer, - nullptr, &bindings.lut[i]); - bindings.lut_update[i] = false; + nullptr, &buffer_infos.back(), &bindings[i].buffer_view.get()); + bindings[i].dirty = false; } } } @@ -248,4 +372,24 @@ void VulkanState::UpdateDescriptorSet() { } } +void VulkanState::CompileTrivialShader() { + auto source = OpenGL::GenerateTrivialVertexShader(true); + + shaderc::Compiler compiler; + shaderc::CompileOptions options; + options.SetOptimizationLevel(shaderc_optimization_level_performance); + options.SetTargetEnvironment(shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_2); + + auto shader_module = compiler.CompileGlslToSpv(source.code, shaderc_glsl_vertex_shader, "vertex shader", options); + if (shader_module.GetCompilationStatus() != shaderc_compilation_status_success) { + LOG_CRITICAL(Render_Vulkan, shader_module.GetErrorMessage().c_str()); + } + + auto shader_code = std::vector{ shader_module.cbegin(), shader_module.cend() }; + vk::ShaderModuleCreateInfo shader_info { {}, shader_code }; + + auto& device = g_vk_instace->GetDevice(); + trivial_vertex_shader = device.createShaderModuleUnique(shader_info); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state.h b/src/video_core/renderer_vulkan/vk_state.h index 72dd23454..870ee6630 100644 --- a/src/video_core/renderer_vulkan/vk_state.h +++ b/src/video_core/renderer_vulkan/vk_state.h @@ -5,47 +5,51 @@ #pragma once #include +#include #include "video_core/renderer_vulkan/vk_texture.h" namespace Vulkan { -enum class DirtyState { - None, - Framebuffer, - Pipeline, - Texture, - Sampler, - TexelBuffer, - ImageTexture, - Depth, - Stencil, - LogicOp, - Viewport, - Scissor, - CullMode, - VertexBuffer, - IndexBuffer, - Uniform, - All +enum class DirtyFlags { + None = 0, + Framebuffer = 1, + Pipeline = 1 << 1, + Texture = 1 << 2, + Sampler = 1 << 3, + TexelBuffer = 1 << 4, + ImageTexture = 1 << 5, + DepthTest = 1 << 6, + Stencil = 1 << 7, + LogicOp = 1 << 8, + Viewport = 1 << 9, + Scissor = 1 << 10, + CullMode = 1 << 11, + VertexBuffer = 1 << 12, + IndexBuffer = 1 << 13, + Uniform = 1 << 14, + FrontFace = 1 << 15, + BlendConsts = 1 << 16, + ColorMask = 1 << 17, + StencilMask = 1 << 18, + DepthWrite = 1 << 19, + All = (1 << 20) - 1 }; -enum class UniformID { - Pica = 0, - Shader = 1 +enum class BindingID { + VertexUniform = 0, + PicaUniform = 1, + Tex0 = 2, + Tex1 = 3, + Tex2 = 4, + TexCube = 5, + LutLF = 6, + LutRG = 7, + LutRGBA = 8 }; -enum class TextureID { - Tex0 = 0, - Tex1 = 1, - Tex2 = 2, - TexCube = 3 -}; - -enum class TexelBufferID { - LF = 0, - RG = 1, - RGBA = 2 -}; +BindingID operator + (BindingID lhs, u32 rhs) { + return static_cast(static_cast(lhs) + rhs); +} /// Tracks global Vulkan state class VulkanState { @@ -56,80 +60,98 @@ public: /// Initialize object to its initial state void Create(); + /// Query state + bool DepthTestEnabled() const { return depth_enabled && depth_writes; } + bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; } + /// Configure drawing state void SetVertexBuffer(VKBuffer* buffer, vk::DeviceSize offset); void SetViewport(vk::Viewport viewport); void SetScissor(vk::Rect2D scissor); + void SetCullMode(vk::CullModeFlags flags); + void SetFrontFace(vk::FrontFace face); + void SetLogicOp(vk::LogicOp logic_op); + void SetStencilWrite(u32 mask); + void SetStencilInput(u32 mask); + void SetStencilTest(bool enable, vk::StencilOp fail, vk::StencilOp pass, vk::StencilOp depth_fail, + vk::CompareOp compare, u32 ref); + void SetDepthWrite(bool enable); + void SetDepthTest(bool enable, vk::CompareOp compare); + void SetColorMask(bool red, bool green, bool blue, bool alpha); + void SetBlendEnable(bool enable); + void SetBlendCostants(float red, float green, float blue, float alpha); + void SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::BlendFactor src_color, vk::BlendFactor dst_color, + vk::BlendFactor src_alpha, vk::BlendFactor dst_alpha); /// Rendering - void SetAttachments(VKTexture* color, VKTexture* depth_stencil); + void PushRenderTargets(VKTexture* color, VKTexture* depth_stencil); + void PopRenderTargets(); void SetRenderArea(vk::Rect2D render_area); void BeginRendering(); void EndRendering(); /// Configure shader resources - void SetUniformBuffer(UniformID id, VKBuffer* buffer, u32 offset, u32 size); - void SetTexture(TextureID id, VKTexture* texture); - void SetTexelBuffer(TexelBufferID id, VKBuffer* buffer); + void SetUniformBuffer(BindingID id, VKBuffer* buffer, u32 offset, u32 size); + void SetTexture(BindingID id, VKTexture* texture); + void SetTexelBuffer(BindingID id, VKBuffer* buffer, vk::Format view_format); void UnbindTexture(VKTexture* image); + void UnbindTexture(u32 index); /// Apply all dirty state to the current Vulkan command buffer - void UpdateDescriptorSet(); void Apply(); private: - // Stage which should be applied - DirtyState dirty_flags; + void UpdateDescriptorSet(); + void GetPipeline(); + void CompileTrivialShader(); + +private: + struct Binding { + bool dirty{}; + std::variant resource{}; + vk::UniqueBufferView buffer_view{}; + }; + + struct Attachment { + VKTexture* color{}; + VKTexture* depth_stencil{}; + }; + + DirtyFlags dirty_flags; bool rendering = false; + VKTexture dummy_texture; + vk::UniqueSampler sampler; - // Input assembly - VKBuffer* vertex_buffer = nullptr, * index_buffer = nullptr; - vk::DeviceSize vertex_offset = 0, index_offset = 0; - - // Shader bindings. These describe which resources - // we have bound to the pipeline and at which - // bind points. When the state is applied the - // descriptor sets are updated with the new - // resources - struct - { - std::array ubo; - std::array ubo_update; - std::array texture; - std::array texture_update; - std::array lut; - std::array lut_update; - } bindings = {}; - std::vector descriptor_sets = {}; + VKBuffer* vertex_buffer{}, * index_buffer{}; + vk::DeviceSize vertex_offset{}, index_offset{}; + std::array bindings{}; + std::vector descriptor_sets{}; vk::UniqueDescriptorPool desc_pool; - // Rasterization - vk::Viewport viewport = { 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f }; - vk::CullModeFlags cull_mode = vk::CullModeFlagBits::eNone; - vk::Rect2D scissor = { {0, 0}, {1, 1} }; - VKTexture dummy_texture; + vk::Viewport viewport{ 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f }; + vk::CullModeFlags cull_mode{}; + vk::FrontFace front_face{}; + vk::Rect2D scissor{}; + vk::LogicOp logic_op{}; + std::array blend_constants{}; - // Render attachments - VKTexture* color_attachment = nullptr, * depth_attachment = nullptr; - vk::Rect2D render_area = {}; - vk::ColorComponentFlags color_mask; - - // Depth - bool depth_enabled; + VKTexture* color_attachment{}, * depth_attachment{}; + vk::Rect2D render_area{}; + bool depth_enabled, depth_writes; vk::CompareOp test_func; - // Stencil - bool stencil_enabled; - vk::StencilFaceFlags face_mask; - vk::StencilOp fail_op, pass_op; - vk::StencilOp depth_fail_op; + u32 stencil_write_mask{}, stencil_input_mask{}, stencil_ref{}; + bool stencil_enabled{}, stencil_writes{}; + vk::StencilOp fail_op, pass_op, depth_fail_op; vk::CompareOp compare_op; - vk::LogicOp logic_op; - std::array clip_distance; + struct { + vk::PipelineColorBlendAttachmentState blend; + vk::PipelineDepthStencilStateCreateInfo depth_stencil; + } static_state; + // Pipeline cache + vk::UniqueShaderModule trivial_vertex_shader; }; -extern std::unique_ptr g_vk_state; - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 0d06d8977..9b1ea51e3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -10,6 +10,7 @@ #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_resource_cache.h" namespace Vulkan { @@ -202,14 +203,24 @@ void VKSwapChain::SetupImages() { ); // Wrap swapchain images with VKTexture - swapchain_images[i].image.Adopt(images[i], color_attachment_view); + swapchain_images[i].image = images[i]; + swapchain_images[i].image_view = device.createImageViewUnique(color_attachment_view); // Create framebuffer for each swapchain image - VKFramebuffer::Info fb_info = { - .color = &swapchain_images[i].image - }; + vk::FramebufferCreateInfo framebuffer_info + ( + {}, + g_vk_res_cache->GetRenderPass(details.format.format, + vk::Format::eUndefined, + vk::SampleCountFlagBits::e1, + vk::AttachmentLoadOp::eLoad), + {}, + details.extent.width, + details.extent.height, + 1 + ); - swapchain_images[i].framebuffer.Create(fb_info); + swapchain_images[i].framebuffer = device.createFramebufferUnique(framebuffer_info); } } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index c9ffb23cc..127491315 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -11,8 +11,9 @@ namespace Vulkan { struct SwapChainImage { - VKTexture image; - VKFramebuffer framebuffer; + vk::Image image; + vk::UniqueImageView image_view; + vk::UniqueFramebuffer framebuffer; }; struct SwapChainDetails { @@ -49,8 +50,8 @@ public: vk::SwapchainKHR GetSwapChain() const { return swapchain.get(); } /// Retrieve current texture and framebuffer - VKTexture& GetCurrentImage() { return swapchain_images[image_index].image; } - VKFramebuffer& GetCurrentFramebuffer() { return swapchain_images[image_index].framebuffer; } + vk::Image GetCurrentImage() { return swapchain_images[image_index].image; } + vk::Framebuffer GetCurrentFramebuffer() { return swapchain_images[image_index].framebuffer.get(); } private: void PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height); diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index 1863bea2f..af0e5f5cc 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -9,17 +9,24 @@ namespace Vulkan { -VKTaskScheduler::VKTaskScheduler(VKSwapChain* swapchain) : swapchain(swapchain) { - -} +VKTaskScheduler::VKTaskScheduler(VKSwapChain* swapchain) : + swapchain(swapchain) {} VKTaskScheduler::~VKTaskScheduler() { - // Sync the GPU before exiting SyncToGPU(); } -vk::CommandBuffer VKTaskScheduler::GetCommandBuffer() { - return tasks[current_task].command_buffer; +std::tuple VKTaskScheduler::RequestStaging(u32 size) { + auto& task = tasks[current_task]; + if (size > STAGING_BUFFER_SIZE - task.current_offset) { + return std::make_tuple(nullptr, 0); + } + + u8* ptr = task.staging.GetHostPointer() + task.current_offset; + auto result = std::make_tuple(ptr, task.current_offset); + + task.current_offset += size; + return result; } bool VKTaskScheduler::Create() { @@ -36,7 +43,13 @@ bool VKTaskScheduler::Create() { timeline = device.createSemaphoreUnique(semaphore_info); - // Initialize task structures + VKBuffer::Info staging_info = { + .size = STAGING_BUFFER_SIZE, + .properties = vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent, + .usage = vk::BufferUsageFlagBits::eTransferSrc + }; + for (auto& task : tasks) { // Create command buffers vk::CommandBufferAllocateInfo buffer_info @@ -47,6 +60,9 @@ bool VKTaskScheduler::Create() { ); task.command_buffer = device.allocateCommandBuffers(buffer_info)[0]; + + // Create staging buffer + task.staging.Create(staging_info); } // Create present semaphore @@ -79,8 +95,8 @@ void VKTaskScheduler::SyncToGPU(u64 task_index) { // Delete all resources that can be freed now for (auto& task : tasks) { if (task.task_id > old_gpu_tick && task.task_id <= new_gpu_tick) { - for (auto& deleter : task.cleanups) { - deleter(); + for (auto& func : task.cleanups) { + func(); } } } @@ -139,17 +155,13 @@ void VKTaskScheduler::BeginTask() { // Wait for the GPU to finish with all resources for this task. SyncToGPU(next_task_index); - // Reset command pools to beginning since we can re-use the memory now device.resetCommandPool(command_pool.get()); - - vk::CommandBufferBeginInfo begin_info(vk::CommandBufferUsageFlagBits::eOneTimeSubmit); - - // Enable commands to be recorded to the command buffer again. - task.command_buffer.begin(begin_info); + task.command_buffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); // Reset upload command buffer state current_task = next_task_index; task.task_id = current_task_id++; + task.current_offset = 0; } std::unique_ptr g_vk_task_scheduler; diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h index 67ff2da2d..6b0805b60 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -17,19 +17,20 @@ #include "common/blocking_loop.h" #include "common/threadsafe_queue.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_buffer.h" namespace Vulkan { -/// Number of tasks that can be submitted concurrently. This allows the host -/// to start recording the next frame while the GPU is working on the -/// current one. Larger values can be used with caution, as they can cause -/// frame latency if the CPU is too far ahead of the GPU constexpr u32 CONCURRENT_TASK_COUNT = 2; +constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024; class VKSwapChain; -/// Wrapper class around command buffer execution. -/// Handles synchronization and submission of command buffers +/// Wrapper class around command buffer execution. Handles an arbitrary +/// number of tasks that can be submitted concurrently. This allows the host +/// to start recording the next frame while the GPU is working on the +/// current one. Larger values can be used with caution, as they can cause +/// frame latency if the CPU is too far ahead of the GPU class VKTaskScheduler { public: explicit VKTaskScheduler(VKSwapChain* swapchain); @@ -39,7 +40,9 @@ public: bool Create(); /// Retrieve either of the current frame's command buffers - vk::CommandBuffer GetCommandBuffer(); + vk::CommandBuffer GetCommandBuffer() const { return tasks[current_task].command_buffer; } + VKBuffer& GetStaging() { return tasks[current_task].staging; } + std::tuple RequestStaging(u32 size); /// Returns the task id that the CPU is recording u64 GetCPUTick() const { return current_task_id; } @@ -62,9 +65,11 @@ private: private: struct Task { + u64 task_id{}; std::vector> cleanups; vk::CommandBuffer command_buffer; - u64 task_id = 0; + VKBuffer staging; + u32 current_offset{}; }; vk::UniqueSemaphore timeline; diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index e98a6d9e5..69f5d7baf 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -12,32 +12,28 @@ namespace Vulkan { VKTexture::~VKTexture() { - // Make sure to unbind the texture before destroying it - g_vk_state->UnbindTexture(this); + if (texture) { + // Make sure to unbind the texture before destroying it + g_vk_state->UnbindTexture(this); - auto deleter = [this]() { - auto& device = g_vk_instace->GetDevice(); + auto deleter = [this]() { + auto& device = g_vk_instace->GetDevice(); + device.destroyImage(texture); + device.destroyImageView(view); + device.freeMemory(memory); + }; - if (texture) { - if (cleanup_image) { - device.destroyImage(texture); - } - - device.destroyImageView(texture_view); - device.freeMemory(texture_memory); - } - }; - - // Schedule deletion of the texture after it's no longer used - // by the GPU - g_vk_task_scheduler->Schedule(deleter); + // Schedule deletion of the texture after it's no longer used + // by the GPU + g_vk_task_scheduler->Schedule(deleter); + } } -void VKTexture::Create(const Info& info, bool make_staging) { +void VKTexture::Create(const VKTexture::Info& create_info) { auto& device = g_vk_instace->GetDevice(); - texture_info = info; + info = create_info; - switch (texture_info.format) + switch (info.format) { case vk::Format::eR8G8B8A8Uint: case vk::Format::eR8G8B8A8Srgb: @@ -48,26 +44,24 @@ void VKTexture::Create(const Info& info, bool make_staging) { channels = 3; break; default: - LOG_CRITICAL(Render_Vulkan, "Unknown texture format {}", texture_info.format); + LOG_CRITICAL(Render_Vulkan, "Unknown texture format {}", info.format); } // Create the texture - u32 image_size = texture_info.width * texture_info.height * channels; - vk::ImageCreateFlags flags; + image_size = info.width * info.height * channels; + + vk::ImageCreateFlags flags{}; if (info.view_type == vk::ImageViewType::eCube) { flags = vk::ImageCreateFlagBits::eCubeCompatible; } - vk::ImageCreateInfo image_info - ( - flags, - info.type, - texture_info.format, - { texture_info.width, texture_info.height, 1 }, info.mipmap_levels, info.array_layers, + vk::ImageCreateInfo image_info { + flags, info.type, info.format, + { info.width, info.height, 1 }, info.levels, info.layers, static_cast(info.multisamples), vk::ImageTiling::eOptimal, vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled - ); + }; texture = device.createImage(image_info); @@ -76,31 +70,23 @@ void VKTexture::Create(const Info& info, bool make_staging) { auto memory_index = VKBuffer::FindMemoryType(requirements.memoryTypeBits, vk::MemoryPropertyFlagBits::eDeviceLocal); vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index); - texture_memory = device.allocateMemory(alloc_info); - device.bindImageMemory(texture, texture_memory, 0); + memory = device.allocateMemory(alloc_info); + device.bindImageMemory(texture, memory, 0); // Create texture view - vk::ImageViewCreateInfo view_info({}, texture, info.view_type, texture_info.format, {}, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)); - texture_view = device.createImageView(view_info); + vk::ImageViewCreateInfo view_info { + {}, texture, info.view_type, info.format, {}, + {info.aspect, 0, info.levels, 0, info.levels} + }; - // Create staging buffer - if (make_staging) { - staging.Create(image_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, - vk::BufferUsageFlagBits::eTransferSrc); + view = device.createImageView(view_info); +} + +void VKTexture::Transition(vk::ImageLayout new_layout) { + if (new_layout == layout) { + return; } -} -void VKTexture::Adopt(vk::Image image, vk::ImageViewCreateInfo view_info) { - // Prevent image cleanup at object destruction - cleanup_image = false; - texture = image; - - // Create image view - texture_view = g_vk_instace->GetDevice().createImageView(view_info); -} - -void VKTexture::TransitionLayout(vk::ImageLayout new_layout, vk::CommandBuffer command_buffer) { struct LayoutInfo { vk::ImageLayout layout; vk::AccessFlags access; @@ -110,7 +96,7 @@ void VKTexture::TransitionLayout(vk::ImageLayout new_layout, vk::CommandBuffer c // Get optimal transition settings for every image layout. Settings taken from Dolphin auto layout_info = [&](vk::ImageLayout layout) -> LayoutInfo { LayoutInfo info = { .layout = layout }; - switch (texture_layout) { + switch (layout) { case vk::ImageLayout::eUndefined: // Layout undefined therefore contents undefined, and we don't care what happens to it. info.access = vk::AccessFlagBits::eNone; @@ -154,7 +140,7 @@ void VKTexture::TransitionLayout(vk::ImageLayout new_layout, vk::CommandBuffer c break; default: - LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", texture_layout); + LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); break; } @@ -162,112 +148,47 @@ void VKTexture::TransitionLayout(vk::ImageLayout new_layout, vk::CommandBuffer c }; // Submit pipeline barrier - LayoutInfo source = layout_info(texture_layout), dst = layout_info(new_layout); - vk::ImageMemoryBarrier barrier - ( + LayoutInfo source = layout_info(layout), dst = layout_info(new_layout); + vk::ImageMemoryBarrier barrier { source.access, dst.access, source.layout, dst.layout, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, texture, - vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1) - ); - - std::array barriers = { barrier }; + vk::ImageSubresourceRange(info.aspect, 0, 1, 0, 1) + }; + std::array barriers{ barrier }; + auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); command_buffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); - - vk::SubmitInfo submit_info({}, {}, {}, 1, &command_buffer); - - // Update texture layout - texture_layout = new_layout; + layout = new_layout; } -void VKTexture::CopyPixels(std::span new_pixels) { - if (!staging.GetHostPointer()) { +void VKTexture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span pixels) { + u8* staging = g_vk_task_scheduler->RequestStaging(pixels.size()); + if (!staging) { LOG_ERROR(Render_Vulkan, "Cannot copy pixels without staging buffer!"); } auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); // Copy pixels to staging buffer - std::memcpy(staging.GetHostPointer(), - new_pixels.data(), new_pixels.size() * channels); + std::memcpy(staging, pixels.data(), pixels.size()); - vk::BufferImageCopy region(0, 0, 0, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), 0, - { texture_info.width, texture_info.height, 1 }); - std::array regions = { region }; + vk::BufferImageCopy copy_region { + 0, row_length, region.extent.height, + {info.aspect, level, layer, 1}, + { region.offset.x, region.offset.y, 0 }, + { region.extent.width, region.extent.height, 1 } + }; // Transition image to transfer format - TransitionLayout(vk::ImageLayout::eTransferDstOptimal, command_buffer); - - command_buffer.copyBufferToImage(staging.GetBuffer(), texture, vk::ImageLayout::eTransferDstOptimal, regions); + Transition(vk::ImageLayout::eTransferDstOptimal); + command_buffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(), + texture, vk::ImageLayout::eTransferDstOptimal, + copy_region); // Prepare for shader reads - TransitionLayout(vk::ImageLayout::eShaderReadOnlyOptimal, command_buffer); -} - -void VKTexture::BlitTo(Common::Rectangle srect, VKTexture* dest, - Common::Rectangle drect, SurfaceParams::SurfaceType type) { - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - - // Ensure textures are of the same dimention - assert(texture_info.width == dest->texture_info.width && - texture_info.height == dest->texture_info.height); - - vk::ImageAspectFlags image_aspect; - switch (type) { - case SurfaceParams::SurfaceType::Color: - case SurfaceParams::SurfaceType::Texture: - image_aspect = vk::ImageAspectFlagBits::eColor; - break; - case SurfaceParams::SurfaceType::Depth: - image_aspect = vk::ImageAspectFlagBits::eDepth; - break; - case SurfaceParams::SurfaceType::DepthStencil: - image_aspect = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; - break; - default: - LOG_CRITICAL(Render_Vulkan, "Unhandled image blit aspect\n"); - UNREACHABLE(); - } - - // Define the region to blit - vk::ImageSubresourceLayers layers(image_aspect, 0, 0, 1); - - std::array src_offsets = { vk::Offset3D(srect.left, srect.bottom, 1), vk::Offset3D(srect.right, srect.top, 1) }; - std::array dst_offsets = { vk::Offset3D(drect.left, drect.bottom, 1), vk::Offset3D(drect.right, drect.top, 1) }; - std::array regions = {{{layers, src_offsets, layers, dst_offsets}}}; - - // Transition image layouts - TransitionLayout(vk::ImageLayout::eTransferSrcOptimal, command_buffer); - dest->TransitionLayout(vk::ImageLayout::eTransferDstOptimal, command_buffer); - - // Perform blit operation - command_buffer.blitImage(texture, vk::ImageLayout::eTransferSrcOptimal, dest->GetHandle(), - vk::ImageLayout::eTransferDstOptimal, regions, vk::Filter::eNearest); -} - -void VKTexture::Fill(Common::Rectangle region, vk::ImageAspectFlags aspect, - vk::ClearValue value) { - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - TransitionLayout(vk::ImageLayout::eTransferDstOptimal, command_buffer); - - // End any ongoing rendering operations - g_vk_state->EndRendering(); - - // Set fill area - g_vk_state->SetAttachments(this, nullptr); - - // Begin clear render - g_vk_state->BeginRendering(); - - vk::Offset2D offset(region.left, region.bottom); - vk::Rect2D rect(offset, { region.GetWidth(), region.GetHeight() }); - vk::ClearAttachment clear_info(aspect, 0, value); - vk::ClearRect clear_rect(rect, 0, 1); - command_buffer.clearAttachments(clear_info, clear_rect); - - TransitionLayout(vk::ImageLayout::eShaderReadOnlyOptimal, command_buffer); + Transition(vk::ImageLayout::eShaderReadOnlyOptimal); } } diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index 1fa1169a1..72b4db670 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -15,15 +15,13 @@ namespace Vulkan { struct SamplerInfo { - std::array wrapping = { vk::SamplerAddressMode::eClampToEdge }; - vk::Filter min_filter = vk::Filter::eLinear; - vk::Filter mag_filter = vk::Filter::eLinear; - vk::SamplerMipmapMode mipmap_mode = vk::SamplerMipmapMode::eLinear; + std::array wrapping{}; + vk::Filter min_filter{}, mag_filter{}; + vk::SamplerMipmapMode mipmap_mode{}; }; /// Vulkan texture object class VKTexture final : public NonCopyable { - friend class VKFramebuffer; public: /// Information for the creation of the target texture struct Info { @@ -31,57 +29,40 @@ public: vk::Format format; vk::ImageType type; vk::ImageViewType view_type; - u32 mipmap_levels = 1; - u32 array_layers = 1; + vk::ImageUsageFlags usage; + vk::ImageAspectFlags aspect; u32 multisamples = 1; + u32 levels = 1, layers = 1; SamplerInfo sampler_info = {}; }; VKTexture() = default; - VKTexture(VKTexture&&) = default; ~VKTexture(); /// Create a new Vulkan texture object - void Create(const Info& info, bool staging = false); + void Create(const VKTexture::Info& info); - /// Create a non-owning texture object, usefull for image object - /// from the swapchain that are managed by another object - void Adopt(vk::Image image, vk::ImageViewCreateInfo view_info); + /// Query objects + bool IsValid() const { return texture; } + vk::Image GetHandle() const { return texture; } + vk::ImageView GetView() const { return view; } + vk::Format GetFormat() const { return info.format; } + vk::ImageLayout GetLayout() const { return layout; } + u32 GetSamples() const { return info.multisamples; } /// Copies CPU side pixel data to the GPU texture buffer - void CopyPixels(std::span pixels); - - /// Get Vulkan objects - vk::Image GetHandle() const { return texture; } - vk::ImageView GetView() const { return texture_view; } - vk::Format GetFormat() const { return texture_info.format; } - vk::Rect2D GetRect() const { return vk::Rect2D({}, { texture_info.width, texture_info.height }); } - vk::ImageLayout GetLayout() const { return texture_layout; } - u32 GetSamples() const { return texture_info.multisamples; } - bool IsValid() { return texture; } + void Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span pixels); /// Used to transition the image to an optimal layout during transfers - void TransitionLayout(vk::ImageLayout new_layout, vk::CommandBuffer command_buffer); - - /// Fill the texture with the values provided - void Fill(Common::Rectangle region, vk::ImageAspectFlags aspect, - vk::ClearValue value); - - /// Copy current texture to another with optionally performing format convesions - void BlitTo(Common::Rectangle srect, VKTexture* dest, - Common::Rectangle drect, SurfaceParams::SurfaceType type); + void Transition(vk::ImageLayout new_layout); private: - bool cleanup_image = true; - Info texture_info; - vk::ImageLayout texture_layout = vk::ImageLayout::eUndefined; + VKTexture::Info info{}; + vk::ImageLayout layout{}; vk::Image texture; - vk::ImageView texture_view; - vk::DeviceMemory texture_memory; - u32 channels; - - // TODO: Make a global staging buffer - VKBuffer staging; + vk::ImageView view; + vk::DeviceMemory memory; + u32 channels{}, image_size{}; }; } // namespace Vulkan