diff --git a/src/common/math_util.h b/src/common/math_util.h index 449d68668..9256fb3d5 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -39,6 +39,9 @@ struct Rectangle { return Rectangle{left, top, static_cast(left + GetWidth() * s), static_cast(top + GetHeight() * s)}; } + [[nodiscard]] Rectangle operator *(const T num) const { + return Rectangle{left * num, top * num, right * num, bottom * num}; + } auto operator <=> (const Rectangle& other) const = default; }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 67866f1ef..6aca6d9e5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -27,9 +27,13 @@ add_library(video_core STATIC common/buffer.h common/framebuffer.h common/pica_types.h + common/rasterizer_cache.cpp + common/rasterizer_cache.h common/shader_gen.cpp common/shader_gen.h common/shader.h + common/surface_params.cpp + common/surface_params.h common/texture.h common/pipeline.h renderer_opengl/frame_dumper_opengl.cpp @@ -92,6 +96,8 @@ add_library(video_core STATIC renderer_vulkan/vk_format_reinterpreter.h renderer_vulkan/vk_format_util.cpp renderer_vulkan/vk_format_util.h + renderer_vulkan/vk_framebuffer.cpp + renderer_vulkan/vk_framebuffer.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h renderer_vulkan/vk_pipeline.cpp @@ -106,8 +112,6 @@ add_library(video_core STATIC renderer_vulkan/vk_shader_gen.h renderer_vulkan/vk_shader.cpp renderer_vulkan/vk_shader.h - renderer_vulkan/vk_state.cpp - renderer_vulkan/vk_state.h renderer_vulkan/vk_surface_params.cpp renderer_vulkan/vk_surface_params.h renderer_vulkan/vk_swapchain.cpp diff --git a/src/video_core/common/backend.h b/src/video_core/common/backend.h index 19f5a8f32..fa58689c9 100644 --- a/src/video_core/common/backend.h +++ b/src/video_core/common/backend.h @@ -46,7 +46,7 @@ public: // Start an indexed draw operation virtual void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, - BufferHandle vertex_buffer, BufferHandle index_buffer, + BufferHandle vertex_buffer, BufferHandle index_buffer, AttribType index_type, u32 base_index, u32 num_indices, u32 base_vertex) = 0; // Executes a compute shader diff --git a/src/video_core/common/buffer.h b/src/video_core/common/buffer.h index c17b1662a..c64c62edf 100644 --- a/src/video_core/common/buffer.h +++ b/src/video_core/common/buffer.h @@ -5,6 +5,7 @@ #pragma once #include +#include "common/assert.h" #include "common/hash.h" #include "common/intrusive_ptr.h" @@ -45,38 +46,59 @@ static_assert(std::is_standard_layout_v, "BufferInfo is not a standa class BufferBase : public IntrusivePtrEnabled { public: BufferBase() = default; - BufferBase(const BufferInfo& info) : info(info) {} + BufferBase(const BufferInfo& info) : info(info), bind_range(info.capacity) {} virtual ~BufferBase() = default; - /// Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes - /// and the optional alignment requirement. - /// The actual used size must be specified on unmapping the chunk. - virtual std::span Map(u32 size, u32 alignment = 0) {}; + // Disable copy constructor + BufferBase(const BufferBase&) = delete; + BufferBase& operator=(const BufferBase&) = delete; - /// Flushes write to buffer memory - virtual void Commit(u32 size = 0) {}; + // Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes + // and the optional alignment requirement. + // The actual used size must be specified on unmapping the chunk. + virtual std::span Map(u32 size, u32 alignment = 0) = 0; - /// Returns the size of the buffer in bytes + // Flushes write to buffer memory + virtual void Commit(u32 size = 0) = 0; + + // Sets the range of the buffer that will be used when bound + void SetBindRange(u32 offset, u32 range) { + ASSERT(offset < info.capacity && offset + range < info.capacity); + bind_offset = offset; + bind_range = range; + } + + // Returns the bind offset + u32 GetBindOffset() const { + return bind_offset; + } + + // Returns the number of bytes after bind_offset that will be bound + u32 GetBindRange() const { + return bind_range; + } + + // Returns the size of the buffer in bytes u32 GetCapacity() const { return info.capacity; } - /// Returns the usage of the buffer + // Returns the usage of the buffer BufferUsage GetUsage() const { return info.usage; } - /// Returns the starting offset of the currently mapped buffer slice + // Returns the starting offset of the currently mapped buffer slice u64 GetCurrentOffset() const { return buffer_offset; } - /// Returns whether the buffer was invalidated by the most recent Map call + // Returns whether the buffer was invalidated by the most recent Map call bool IsInvalid() const { return invalid; } - /// Invalidates the buffer + // Invalidates the buffer void Invalidate() { buffer_offset = 0; invalid = true; @@ -84,6 +106,8 @@ public: protected: BufferInfo info{}; + u32 bind_offset = 0; + u32 bind_range; // Initialized to capacity u32 buffer_offset = 0; bool invalid = false; }; diff --git a/src/video_core/common/framebuffer.h b/src/video_core/common/framebuffer.h index 86a8e7219..e6fa212b0 100644 --- a/src/video_core/common/framebuffer.h +++ b/src/video_core/common/framebuffer.h @@ -4,6 +4,7 @@ #pragma once +#include "common/vector_math.h" #include "video_core/common/texture.h" namespace VideoCore { @@ -22,12 +23,10 @@ struct FramebufferInfo { TextureHandle color; TextureHandle depth_stencil; MSAASamples samples = MSAASamples::x1; - Rect2D draw_rect{}; - /// Hashes the framebuffer object and returns a unique identifier + // Hashes the framebuffer object and returns a unique identifier const u64 Hash() const { - // The only member IntrusivePtr has is a pointer to the - // handle so it's fine hash it + // IntrusivePtr only has a pointer member so it's fine hash it return Common::ComputeStructHash64(*this); } }; @@ -40,6 +39,13 @@ public: FramebufferBase(const FramebufferInfo& info) : info(info) {} virtual ~FramebufferBase() = default; + // Disable copy constructor + FramebufferBase(const FramebufferBase&) = delete; + FramebufferBase& operator=(const FramebufferBase&) = delete; + + // Clears the attachments bound to the framebuffer + virtual void DoClear(Common::Rectangle rect, Common::Vec4f color, float depth, u8 stencil) = 0; + /// Returns an immutable reference to the color attachment const TextureHandle& GetColorAttachment() const { return info.color; @@ -55,11 +61,6 @@ public: return info.samples; } - /// Returns the rendering area - Rect2D GetDrawRectangle() const { - return info.draw_rect; - } - protected: FramebufferInfo info; }; diff --git a/src/video_core/common/pipeline.h b/src/video_core/common/pipeline.h index c159f7f98..439251f4d 100644 --- a/src/video_core/common/pipeline.h +++ b/src/video_core/common/pipeline.h @@ -86,7 +86,8 @@ union BlendState { enum class AttribType : u8 { Float = 0, Int = 1, - Short = 2 + Short = 2, + Byte = 3 }; union VertexAttribute { @@ -113,6 +114,8 @@ struct PipelineInfo { BlendState blending{}; DepthStencilState depth_stencil{}; RasterizationState rasterization{}; + TextureFormat color_attachment = TextureFormat::RGBA8; + TextureFormat depth_attachment = TextureFormat::D24S8; const u64 Hash() const { return Common::ComputeStructHash64(*this); @@ -139,6 +142,10 @@ public: // Binds the sampler in the specified slot virtual void BindSampler(u32 group, u32 slot, SamplerHandle handle) = 0; + PipelineType GetType() const { + return type; + } + /// Sets the primitive topology void SetTopology(Pica::TriangleTopology topology) { info.rasterization.topology.Assign(topology); diff --git a/src/video_core/common/rasterizer_cache.cpp b/src/video_core/common/rasterizer_cache.cpp new file mode 100644 index 000000000..fbf6a569f --- /dev/null +++ b/src/video_core/common/rasterizer_cache.cpp @@ -0,0 +1,1682 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/texture.h" +#include "common/vector_math.h" +#include "core/core.h" +#include "core/memory.h" +#include "core/settings.h" +#include "video_core/pica_state.h" +#include "video_core/common/backend.h" +#include "video_core/common/rasterizer_cache.h" +#include "video_core/utils.h" +#include "video_core/video_core.h" + +namespace VideoCore { + +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +static constexpr std::array fb_texture_formats = { + TextureFormat::RGBA8, + TextureFormat::RGB8, + TextureFormat::RGB5A1, + TextureFormat::RGB565, + TextureFormat::RGBA4, +}; + +static constexpr std::array depth_texture_formats = { + TextureFormat::D16, + TextureFormat::Undefined, + TextureFormat::D24, + TextureFormat::D24S8 +}; + +TextureFormat GetTextureFormat(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT(static_cast(pixel_format) < fb_texture_formats.size()); + return fb_texture_formats[static_cast(pixel_format)]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + std::size_t tuple_idx = static_cast(pixel_format) - 14; + ASSERT(tuple_idx < depth_texture_formats.size()); + return depth_texture_formats[tuple_idx]; + } + + LOG_ERROR(Render_Vulkan, "Unknown pixel format {}!", pixel_format); + return TextureFormat::Undefined; +} + +template +static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +template +static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; + if constexpr (morton_to_gl) { + if constexpr (format == PixelFormat::D24S8) { + gl_ptr[0] = tile_ptr[3]; + std::memcpy(gl_ptr + 1, tile_ptr, 3); + } else if (format == PixelFormat::RGBA8) { + // because GLES does not have ABGR format + // so we will do byteswapping here + gl_ptr[0] = tile_ptr[3]; + gl_ptr[1] = tile_ptr[2]; + gl_ptr[2] = tile_ptr[1]; + gl_ptr[3] = tile_ptr[0]; + } else if (format == PixelFormat::RGB8) { + gl_ptr[0] = tile_ptr[2]; + gl_ptr[1] = tile_ptr[1]; + gl_ptr[2] = tile_ptr[0]; + } else { + std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if constexpr (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gl_ptr + 1, 3); + tile_ptr[3] = gl_ptr[0]; + } else if (format == PixelFormat::RGBA8) { + // because GLES does not have ABGR format + // so we will do byteswapping here + tile_ptr[0] = gl_ptr[3]; + tile_ptr[1] = gl_ptr[2]; + tile_ptr[2] = gl_ptr[1]; + tile_ptr[3] = gl_ptr[0]; + } else if (format == PixelFormat::RGB8) { + tile_ptr[0] = gl_ptr[2]; + tile_ptr[1] = gl_ptr[1]; + tile_ptr[2] = gl_ptr[0]; + } else { + std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); + } + } + } + } +} + +template +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetBytesPerPixel(format); + static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); + gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = (begin_pixel_index % (stride * 8)) / 8; + u32 y = (begin_pixel_index / (stride * 8)) * 8; + + gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; + + auto glbuf_next_tile = [&] { + x = (x + 8) % stride; + gl_buffer += 8 * gl_bytes_per_pixel; + if (!x) { + y += 8; + gl_buffer -= stride * 9 * gl_bytes_per_pixel; + } + }; + + u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + glbuf_next_tile(); + } + + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + PAddr current_paddr = aligned_start; + while (tile_buffer < buffer_end) { + // Pokemon Super Mystery Dungeon will try to use textures that go beyond + // the end address of VRAM. Stop reading if reaches invalid address + if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) || + !VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) { + LOG_ERROR(Render_Vulkan, "Out of bound texture"); + break; + } + + MortonCopyTile(stride, tile_buffer, gl_buffer); + tile_buffer += tile_size; + current_paddr += tile_size; + glbuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array morton_to_gl_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +static constexpr std::array gl_to_morton_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +// Allocate an uninitialized texture of appropriate size and format for the surface +TextureHandle RasterizerCache::AllocateSurfaceTexture(const TextureInfo& info) { + auto recycled_tex = host_texture_recycler.find(info); + if (recycled_tex != host_texture_recycler.end()) { + TextureHandle texture = std::move(recycled_tex->second); + host_texture_recycler.erase(recycled_tex); + return texture; + } + + return backend->CreateTexture(info); +} + +void RasterizerCache::RecycleTexture(TextureHandle&& handle) { + host_texture_recycler.emplace(handle->GetInfo(), std::move(handle)); +} + +bool RasterizerCache::FillSurface(const Surface& surface, const u8* fill_data, Common::Rectangle fill_rect) { + const bool color_surface = surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture; + const bool depth_surface = surface->type == SurfaceType::Depth || surface->type == SurfaceType::DepthStencil; + const FramebufferInfo framebuffer_info = { + .color = color_surface ? surface->texture : TextureHandle{}, + .depth_stencil = depth_surface ? surface->texture : TextureHandle{} + }; + + // Some backends (Vulkan) provide texture clear functions but in general + // it's still more efficient to use framebuffers for fills to take advantage + // of the dedicated clear engine on the GPU + FramebufferHandle framebuffer; + if (auto iter = framebuffer_cache.find(framebuffer_info); iter != framebuffer_cache.end()) { + framebuffer = iter->second; + } else { + framebuffer = backend->CreateFramebuffer(framebuffer_info); + framebuffer_cache.emplace(framebuffer_info, framebuffer); + } + + surface->InvalidateAllWatcher(); + + if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.format = static_cast(surface->pixel_format); + Common::Vec4f color_values = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info) / 255.f; + + framebuffer->DoClear(fill_rect, color_values, 0.0f, 0); + } else if (surface->type == SurfaceType::Depth) { + u32 depth_32bit = 0; + float depth_float; + + if (surface->pixel_format == SurfaceParams::PixelFormat::D16) { + std::memcpy(&depth_32bit, fill_data, 2); + depth_float = depth_32bit / 65535.0f; // 2^16 - 1 + } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) { + std::memcpy(&depth_32bit, fill_data, 3); + depth_float = depth_32bit / 16777215.0f; // 2^24 - 1 + } else { + LOG_ERROR(Render_Vulkan, "Unknown format for depth surface!"); + UNREACHABLE(); + } + + framebuffer->DoClear(fill_rect, {}, depth_float, 0); + } else if (surface->type == SurfaceType::DepthStencil) { + u32 value_32bit; + std::memcpy(&value_32bit, fill_data, sizeof(u32)); + + float depth_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1 + u8 stencil_int = (value_32bit >> 24); + + framebuffer->DoClear(fill_rect, {}, depth_float, stencil_int); + } + return true; +} + +CachedSurface::~CachedSurface() { + if (texture.IsValid()) { + owner.RecycleTexture(std::move(texture)); + } +} + +bool CachedSurface::CanFill(const SurfaceParams& dest_surface, + SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +MICROPROFILE_DEFINE(CopySurface, "RasterizerCache", "CopySurface", MP_RGB(128, 192, 64)); +void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + MICROPROFILE_SCOPE(CopySurface); + + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval && src_surface != dst_surface); + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) { + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + } + + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params)); + return; + } + + if (src_surface->CanSubRect(subrect_params)) { + src_surface->texture->BlitTo(dst_surface->texture, src_surface->GetScaledSubRect(subrect_params), + dst_surface->GetScaledSubRect(subrect_params)); + return; + } + + UNREACHABLE(); +} + +MICROPROFILE_DEFINE(SurfaceLoad, "RasterizerCache", "Surface Load", MP_RGB(128, 192, 64)); +void CachedSurface::LoadBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + const bool need_swap = (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8); + + const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + if (gl_buffer.empty()) { + gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); + } + + // TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + if (need_swap) { + // TODO(liushuyu): check if the byteswap here is 100% correct + // cannot fully test this + if (pixel_format == PixelFormat::RGBA8) { + for (std::size_t i = start_offset; i < load_end - addr; i += 4) { + gl_buffer[i] = texture_src_data[i + 3]; + gl_buffer[i + 1] = texture_src_data[i + 2]; + gl_buffer[i + 2] = texture_src_data[i + 1]; + gl_buffer[i + 3] = texture_src_data[i]; + } + } else if (pixel_format == PixelFormat::RGB8) { + for (std::size_t i = start_offset; i < load_end - addr; i += 3) { + gl_buffer[i] = texture_src_data[i + 2]; + gl_buffer[i + 1] = texture_src_data[i + 1]; + gl_buffer[i + 2] = texture_src_data[i]; + } + } + } else { + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + load_end - load_start); + } + } else { + if (type == SurfaceType::Texture) { + Pica::Texture::TextureInfo tex_info{}; + tex_info.width = width; + tex_info.height = height; + tex_info.format = static_cast(pixel_format); + tex_info.SetDefaultStride(); + tex_info.physical_address = addr; + + const SurfaceInterval load_interval(load_start, load_end); + const auto rect = GetSubRect(FromInterval(load_interval)); + ASSERT(FromInterval(load_interval).GetInterval() == load_interval); + + for (unsigned y = rect.bottom; y < rect.top; ++y) { + for (unsigned x = rect.left; x < rect.right; ++x) { + auto vec4 = + Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info); + const std::size_t offset = (x + (width * y)) * 4; + std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4); + } + } + } else { + morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + addr, load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + ASSERT(gl_buffer.size() == width * height * GetBytesPerPixel(pixel_format)); + + // TODO: Should probably be done in ::Memory:: and check for other regions too + // same as loadglbuffer() + if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u32 start_offset = flush_start - addr; + const u32 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % fill_size); + const u32 backup_bytes = start_offset % fill_size; + std::array backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { + std::memcpy(&dst_buffer[offset], &fill_data[0], + std::min(fill_size, end_offset - offset)); + } + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + if (pixel_format == PixelFormat::RGBA8) { + for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { + dst_buffer[i] = gl_buffer[i + 3]; + dst_buffer[i + 1] = gl_buffer[i + 2]; + dst_buffer[i + 2] = gl_buffer[i + 1]; + dst_buffer[i + 3] = gl_buffer[i]; + } + } else if (pixel_format == PixelFormat::RGB8) { + for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { + dst_buffer[i] = gl_buffer[i + 2]; + dst_buffer[i + 1] = gl_buffer[i + 1]; + dst_buffer[i + 2] = gl_buffer[i]; + } + } else { + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], + flush_end - flush_start); + } + } else { + gl_to_morton_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], + addr, flush_start, flush_end); + } +} + +bool CachedSurface::LoadCustomTexture(u64 tex_hash) { + auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); + const auto& image_interface = Core::System::GetInstance().GetImageInterface(); + + if (custom_tex_cache.IsTextureCached(tex_hash)) { + custom_tex_info = custom_tex_cache.LookupTexture(tex_hash); + return true; + } + + if (!custom_tex_cache.CustomTextureExists(tex_hash)) { + return false; + } + + const auto& path_info = custom_tex_cache.LookupTexturePathInfo(tex_hash); + if (!image_interface->DecodePNG(custom_tex_info.tex, custom_tex_info.width, + custom_tex_info.height, path_info.path)) { + LOG_ERROR(Render_OpenGL, "Failed to load custom texture {}", path_info.path); + return false; + } + + const std::bitset<32> width_bits(custom_tex_info.width); + const std::bitset<32> height_bits(custom_tex_info.height); + if (width_bits.count() != 1 || height_bits.count() != 1) { + LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path); + return false; + } + + LOG_DEBUG(Render_OpenGL, "Loaded custom texture from {}", path_info.path); + Common::FlipRGBA8Texture(custom_tex_info.tex, custom_tex_info.width, custom_tex_info.height); + custom_tex_cache.CacheTexture(tex_hash, custom_tex_info.tex, custom_tex_info.width, + custom_tex_info.height); + return true; +} + +/*void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) { + // Make sure the texture size is a power of 2 + // If not, the surface is actually a framebuffer + std::bitset<32> width_bits(width); + std::bitset<32> height_bits(height); + if (width_bits.count() != 1 || height_bits.count() != 1) { + LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})", + tex_hash, width, height); + return; + } + + // Dump texture to RGBA8 and encode as PNG + const auto& image_interface = Core::System::GetInstance().GetImageInterface(); + auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache(); + std::string dump_path = + fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), + Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id); + if (!FileUtil::CreateFullPath(dump_path)) { + LOG_ERROR(Render, "Unable to create {}", dump_path); + return; + } + + dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, tex_hash, pixel_format); + if (!custom_tex_cache.IsTextureDumped(tex_hash) && !FileUtil::Exists(dump_path)) { + custom_tex_cache.SetTextureDumped(tex_hash); + + LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path); + std::vector decoded_texture; + decoded_texture.resize(width * height * 4); + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_texture = state.texture_units[0].texture_2d; + state.Apply(); + + // GetTexImageOES is used even if not using OpenGL ES to work around a small issue that + // happens if using custom textures with texture dumping at the same. + // Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a + // higher quality 256x256 texture. If the 256x256 texture is displayed first and the + // 32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture + // will appear in the corner of the 256x256 texture. If texture dumping is enabled and + // the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL + // texture is still 256x256, Citra crashes because it thinks the texture is only 32x32. + // GetTexImageOES conveniently only dumps the specified region, and works on both + // desktop and ES. + // if the backend isn't OpenGL ES, this won't be initialized yet + if (!owner.texture_downloader_es) + owner.texture_downloader_es = std::make_unique(false); + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, + height, width, &decoded_texture[0]); + state.texture_units[0].texture_2d = old_texture; + state.Apply(); + Common::FlipRGBA8Texture(decoded_texture, width, height); + if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height)) + LOG_ERROR(Render_OpenGL, "Failed to save decoded texture"); + } +}*/ + +MICROPROFILE_DEFINE(TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); +void CachedSurface::UploadTexture(Common::Rectangle rect) { + if (type == SurfaceType::Fill) { + return; + } + + MICROPROFILE_SCOPE(TextureUL); + + ASSERT(gl_buffer.size() == width * height * GetBytesPerPixel(pixel_format)); + + u64 tex_hash = 0; + if (Settings::values.dump_textures || Settings::values.custom_textures) { + tex_hash = Common::ComputeHash64(gl_buffer.data(), gl_buffer.size()); + } + + if (Settings::values.custom_textures) { + is_custom = LoadCustomTexture(tex_hash); + } + + // Load data from memory to the surface + s32 x0 = static_cast(rect.left); + s32 y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); + + TextureInfo texture_info = { + .type = TextureType::Texture2D, + .view_type = TextureViewType::View2D, + .format = TextureFormat::RGBA8 + }; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in + // surface + TextureHandle target_tex = texture, unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + if (is_custom) { + texture_info.width = custom_tex_info.width; + texture_info.height = custom_tex_info.height; + } else { + texture_info.width = rect.GetWidth(); + texture_info.height = rect.GetHeight(); + texture_info.format = GetTextureFormat(pixel_format); + } + + texture_info.UpdateMipLevels(); + target_tex = unscaled_tex = owner.AllocateSurfaceTexture(texture_info); + } + + // Ensure the stride is aligned + ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + if (is_custom) { + if (res_scale == 1) { + texture_info.width = custom_tex_info.width; + texture_info.height = custom_tex_info.height; + texture_info.UpdateMipLevels(); + + texture = owner.AllocateSurfaceTexture(texture_info); + } + + Rect2D rect{x0, y0, custom_tex_info.width, custom_tex_info.height}; + texture->Upload(rect, custom_tex_info.width, custom_tex_info.tex); + } else { + const u32 update_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format); + auto data = std::span{gl_buffer.data() + buffer_offset, update_size}; + + target_tex->Upload(rect, stride, data); + } + + /*if (Settings::values.dump_textures && !is_custom) + DumpTexture(target_tex, tex_hash);*/ + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + auto from_rect = is_custom ? Common::Rectangle{0, custom_tex_info.height, custom_tex_info.width, 0} + : Common::Rectangle{0, rect.GetHeight(), rect.GetWidth(), 0}; + + /*if (!owner.texture_filterer->Filter(unscaled_tex.handle, from_rect, texture.handle, + scaled_rect, type, read_fb_handle, draw_fb_handle)) { + BlitTextures(unscaled_tex.handle, from_rect, texture.handle, scaled_rect, type, + read_fb_handle, draw_fb_handle); + }*/ + + unscaled_tex->BlitTo(texture, from_rect, scaled_rect); + } + + InvalidateAllWatcher(); +} + +MICROPROFILE_DEFINE(TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadTexture(const Common::Rectangle& rect) { + if (type == SurfaceType::Fill) { + return; + } + + MICROPROFILE_SCOPE(TextureDL); + + if (gl_buffer.empty()) { + gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); + } + + // Ensure the stride is aligned + ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + + s32 x0 = static_cast(rect.left); + s32 y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + TextureHandle download_source = texture; + if (res_scale != 1) { + auto scaled_rect = rect * res_scale; + TextureInfo texture_info = { + .width = static_cast(rect.GetWidth()), + .height = static_cast(rect.GetHeight()), + .type = TextureType::Texture2D, + .view_type = TextureViewType::View2D, + .format = GetTextureFormat(pixel_format) + }; + + texture_info.UpdateMipLevels(); + + Common::Rectangle unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + TextureHandle unscaled_tex = owner.AllocateSurfaceTexture(texture_info); + + texture->BlitTo(unscaled_tex, scaled_rect, unscaled_tex_rect); + download_source = unscaled_tex; + } + + // Download pixel data + const u32 download_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format); + auto data = std::span{gl_buffer.data() + buffer_offset, download_size}; + + download_source->Download(rect, stride, data); +} + +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require + // validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +static constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template +static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval = std::nullopt) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (const auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + find_flags & MatchFlags::Copy + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (!(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + auto copy_interval = + params.FromInterval(*validate_interval).GetCopyableInterval(surface); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +RasterizerCache::RasterizerCache(std::unique_ptr& backend) : backend(backend) { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + /*texture_filterer = std::make_unique(Settings::values.texture_filter_name, + resolution_scale_factor); + format_reinterpreter = std::make_unique(); + if (GLES) + texture_downloader_es = std::make_unique(false);*/ +} + +RasterizerCache::~RasterizerCache() { +#ifndef ANDROID + // This is for switching renderers, which is unsupported on Android, and costly on shutdown + ClearAll(false); +#endif +} + +MICROPROFILE_DEFINE(BlitSurface, "RasterizerCache", "BlitSurface", MP_RGB(128, 192, 64)); +bool RasterizerCache::BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, + const Surface& dst_surface, const Common::Rectangle& dst_rect) { + MICROPROFILE_SCOPE(BlitSurface); + + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + dst_surface->InvalidateAllWatcher(); + src_surface->texture->BlitTo(dst_surface->texture, src_rect, dst_rect); + return true; +} + +Surface RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find + // it to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +SurfaceRect_Tuple RasterizerCache::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, Common::Rectangle{}); + } + + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); + } + + return std::make_tuple(surface, surface->GetScaledSubRect(params)); +} + +Surface RasterizerCache::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) { + const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + return GetTextureSurface(info, config.config.lod.max_level); +} + +Surface RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& info, + u32 max_level) { + if (info.physical_address == 0) { + return nullptr; + } + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format); + params.res_scale = /*texture_filterer->IsNull() ? 1 :*/ resolution_scale_factor; + params.UpdateParams(); + + u32 min_width = info.width >> max_level; + u32 min_height = info.height >> max_level; + if (min_width % 8 != 0 || min_height % 8 != 0) { + LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, + min_height); + return nullptr; + } + if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { + LOG_CRITICAL(Render_OpenGL, + "Texture size ({}x{}) does not support required mipmap level ({})", + params.width, params.height, max_level); + return nullptr; + } + + auto surface = GetSurface(params, ScaleMatch::Ignore, true); + if (!surface) + return nullptr; + + // Update mipmap if necessary + if (max_level != 0) { + if (max_level >= 8) { + // since PICA only supports texture size between 8 and 1024, there are at most eight + // possible mipmap levels including the base. + LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); + return nullptr; + } + + // Allocate more mipmap level if necessary + if (surface->max_level < max_level) { + if (surface->is_custom /*|| !texture_filterer->IsNull()*/) { + // TODO: proper mipmap support for custom textures + surface->texture->GenerateMipmaps(); + } + + surface->max_level = max_level; + } + + // Blit mipmaps that have been invalidated + SurfaceParams surface_params = *surface; + for (u32 level = 1; level <= max_level; ++level) { + // In PICA all mipmap levels are stored next to each other + surface_params.addr += surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; + surface_params.width /= 2; + surface_params.height /= 2; + surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it + surface_params.UpdateParams(); + + auto& watcher = surface->level_watchers[level - 1]; + if (!watcher || !watcher->Get()) { + auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); + watcher = level_surface ? level_surface->CreateWatcher() : nullptr; + } + + if (watcher && !watcher->IsValid()) { + auto level_surface = watcher->Get(); + if (!level_surface->invalid_regions.empty()) { + ValidateSurface(level_surface, level_surface->addr, level_surface->size); + } + + if (!surface->is_custom /*&& texture_filterer->IsNull()*/) { + level_surface->texture->BlitTo(surface->texture, level_surface->GetScaledRect(), + surface_params.GetScaledRect(), 0, level); + } + + watcher->Validate(); + } + } + } + + return surface; +} + +const CachedTextureCube& RasterizerCache::GetTextureCube(const TextureCubeConfig& config) { + auto& cube = texture_cube_cache[config]; + + struct Face { + Face(std::shared_ptr& watcher, PAddr address, CubeFace face) + : watcher(watcher), address(address), face(face) {} + + std::shared_ptr& watcher; + PAddr address; + CubeFace face; + }; + + const std::array faces = {{ + {cube.px, config.px, CubeFace::PositiveX}, + {cube.nx, config.nx, CubeFace::NegativeX}, + {cube.py, config.py, CubeFace::PositiveY}, + {cube.ny, config.ny, CubeFace::NegativeY}, + {cube.pz, config.pz, CubeFace::PositiveZ}, + {cube.nz, config.nz, CubeFace::NegativeZ}, + }}; + + for (const Face& face : faces) { + if (!face.watcher || !face.watcher->Get()) { + Pica::Texture::TextureInfo info; + info.physical_address = face.address; + info.height = info.width = config.width; + info.format = config.format; + info.SetDefaultStride(); + auto surface = GetTextureSurface(info); + if (surface) { + face.watcher = surface->CreateWatcher(); + } else { + // Can occur when texture address is invalid. We mark the watcher with nullptr + // in this case and the content of the face wouldn't get updated. These are + // usually leftover setup in the texture unit and games are not supposed to draw + // using them. + face.watcher = nullptr; + } + } + } + + const u16 scaled_size = cube.res_scale * config.width; + if (!cube.texture.IsValid()) { + for (const Face& face : faces) { + if (face.watcher) { + auto surface = face.watcher->Get(); + cube.res_scale = std::max(cube.res_scale, surface->res_scale); + } + } + + TextureInfo texture_info = { + .width = scaled_size, + .height = scaled_size, + .type = TextureType::Texture2D, + .view_type = TextureViewType::ViewCube, + .format = GetTextureFormat(CachedSurface::PixelFormatFromTextureFormat(config.format)) + }; + + texture_info.UpdateMipLevels(); + cube.texture = AllocateSurfaceTexture(texture_info); + } + + // Validate and gather all the cube faces + for (const Face& face : faces) { + if (face.watcher && !face.watcher->IsValid()) { + auto surface = face.watcher->Get(); + if (!surface->invalid_regions.empty()) { + ValidateSurface(surface, surface->addr, surface->size); + } + + auto src_rect = surface->GetScaledRect(); + auto dst_rect = Common::Rectangle{0, scaled_size, scaled_size, 0}; + surface->texture->BlitTo(cube.texture, src_rect, dst_rect, 0, 0, 0, static_cast(face.face)); + face.watcher->Validate(); + } + } + + return cube; +} + +SurfaceSurfaceRect_Tuple RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const Common::Rectangle& viewport_rect) { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // update resolution_scale_factor and reset cache if changed + /*const bool scale_factor_changed = resolution_scale_factor != VideoCore::GetResolutionScaleFactor(); + if (scale_factor_changed | (VideoCore::g_texture_filter_update_requested.exchange(false) && + texture_filterer->Reset(Settings::values.texture_filter_name, resolution_scale_factor))) { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + texture_cube_cache.clear(); + }*/ + + Common::Rectangle viewport_clamped{ + static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + Common::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + Common::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + Common::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + color_surface->InvalidateAllWatcher(); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + depth_surface->InvalidateAllWatcher(); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); +} + +Surface RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { + Surface new_surface = std::make_shared(*this); + + new_surface->addr = config.GetStartAddress(); + new_surface->end = config.GetEndAddress(); + new_surface->size = new_surface->end - new_surface->addr; + new_surface->type = SurfaceType::Fill; + new_surface->res_scale = std::numeric_limits::max(); + + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +SurfaceRect_Tuple RasterizerCache::GetTexCopySurface(const SurfaceParams& params) { + Common::Rectangle rect{}; + + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return std::make_tuple(match_surface, rect); +} + +void RasterizerCache::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (const auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) + return; + + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + auto validate_regions = surface->invalid_regions & validate_interval; + auto notify_validated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; + + while (true) { + const auto it = validate_regions.begin(); + if (it == validate_regions.end()) + break; + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + notify_validated(copy_interval); + continue; + } + + // Try to find surface in cache with different format + // that can can be reinterpreted to the requested format. + if (ValidateByReinterpretation(surface, params, interval)) { + notify_validated(interval); + continue; + } + // Could not find a matching reinterpreter, check if we need to implement a + // reinterpreter + if (NoUnimplementedReinterpretations(surface, params, interval) && + !IntervalHasInvalidPixelFormat(params, interval)) { + // No surfaces were found in the cache that had a matching bit-width. + // If the region was created entirely on the GPU, + // assume it was a developer mistake and skip flushing. + if (boost::icl::contains(dirty_regions, interval)) { + LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is " + "invalid. Skipping validation"); + validate_regions.erase(interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadBuffer(params.addr, params.end); + surface->UploadTexture(surface->GetSubRect(params)); + notify_validated(params.GetInterval()); + } +} + +bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams& params, + const SurfaceInterval& interval) { + static constexpr std::array all_formats{ + PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, + PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, + PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, + PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, + PixelFormat::D24S8, + }; + bool implemented = true; + for (PixelFormat format : all_formats) { + if (SurfaceParams::GetFormatBpp(format) == surface->GetFormatBpp()) { + params.pixel_format = format; + // This could potentially be expensive, + // although experimentally it hasn't been too bad + Surface test_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (test_surface != nullptr) { + LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", + SurfaceParams::PixelFormatAsString(format), + SurfaceParams::PixelFormatAsString(surface->pixel_format)); + implemented = false; + } + } + } + return implemented; +} + +bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval) { + params.pixel_format = PixelFormat::Invalid; + for (const auto& set : RangeFromInterval(surface_cache, interval)) + for (const auto& surface : set.second) + if (surface->pixel_format == PixelFormat::Invalid) { + LOG_WARNING(Render_OpenGL, "Surface found with invalid pixel format"); + return true; + } + return false; +} + +bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, + const SurfaceInterval& interval) { + auto [cvt_begin, cvt_end] = + format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format); + for (auto reinterpreter = cvt_begin; reinterpreter != cvt_end; ++reinterpreter) { + PixelFormat format = reinterpreter->first.src_format; + params.pixel_format = format; + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + + if (reinterpret_surface != nullptr) { + SurfaceInterval reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); + SurfaceParams reinterpret_params = surface->FromInterval(reinterpret_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); + auto dest_rect = surface->GetScaledSubRect(reinterpret_params); + + if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && + surface->res_scale == resolution_scale_factor) { + // The destination surface is either a framebuffer, or a filtered texture. + // Create an intermediate surface to convert to before blitting to the + // destination. + Common::Rectangle tmp_rect{0, dest_rect.GetHeight() / resolution_scale_factor, + dest_rect.GetWidth() / resolution_scale_factor, 0}; + OGLTexture tmp_tex = AllocateSurfaceTexture( + GetFormatTuple(reinterpreter->first.dst_format), tmp_rect.right, tmp_rect.top); + reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect, + read_framebuffer.handle, tmp_tex.handle, + tmp_rect, draw_framebuffer.handle); + SurfaceParams::SurfaceType type = + SurfaceParams::GetFormatType(reinterpreter->first.dst_format); + + if (!texture_filterer->Filter(tmp_tex.handle, tmp_rect, surface->texture.handle, + dest_rect, type, read_framebuffer.handle, + draw_framebuffer.handle)) { + BlitTextures(tmp_tex.handle, tmp_rect, surface->texture.handle, dest_rect, type, + read_framebuffer.handle, draw_framebuffer.handle); + } + } else { + reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect, + read_framebuffer.handle, surface->texture.handle, + dest_rect, draw_framebuffer.handle); + } + return true; + } + } + return false; +} + +void RasterizerCache::ClearAll(bool flush) { + const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF); + // Force flush all surfaces from the cache + if (flush) { + FlushRegion(0x0, 0xFFFFFFFF); + } + // Unmark all of the marked pages + for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) { + const auto interval = pair.first & flush_interval; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + } + + // Remove the whole cache without really looking at it. + cached_pages -= flush_interval; + dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); + surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF); + remove_surfaces.clear(); +} + +void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadTexture(surface->GetSubRect(params)); + } + + surface->FlushBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + flushed_intervals += interval; + } + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +void RasterizerCache::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (const auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + cached_surface->InvalidateAllWatcher(); + + // If the surface has no salvageable data it should be removed from the cache to avoid + // clogging the data structure + if (cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } + } + } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (const auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +Surface RasterizerCache::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared(*this); + static_cast(*surface) = params; + + surface->invalid_regions.insert(surface->GetInterval()); + + TextureInfo texture_info = { + .width = static_cast(surface->GetScaledWidth()), + .height = static_cast(surface->GetScaledHeight()), + .type = TextureType::Texture2D, + .view_type = TextureViewType::View2D, + .format = GetTextureFormat(surface->pixel_format) + }; + + surface->texture = AllocateSurfaceTexture(texture_info); + return surface; +} + +void RasterizerCache::RegisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCache::UnregisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (!surface->registered) { + return; + } + surface->registered = false; + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +void RasterizerCache::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u32 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, + true); + else if (delta < 0 && count == -delta) + VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, + false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); +} + +} // namespace VideoCore diff --git a/src/video_core/common/rasterizer_cache.h b/src/video_core/common/rasterizer_cache.h new file mode 100644 index 000000000..83a452690 --- /dev/null +++ b/src/video_core/common/rasterizer_cache.h @@ -0,0 +1,316 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "common/assert.h" +#include "common/math_util.h" +#include "core/custom_tex_cache.h" +#include "video_core/common/surface_params.h" +#include "video_core/common/texture.h" +#include "video_core/common/framebuffer.h" +#include "video_core/texture/texture_decode.h" + +namespace VideoCore { + +class RasterizerCache; +class TextureFilterer; + +enum class CubeFace { + PositiveX, + NegativeX, + PositiveY, + NegativeY, + PositiveZ, + NegativeZ, +}; + +struct TextureCubeConfig { + PAddr px; + PAddr nx; + PAddr py; + PAddr ny; + PAddr pz; + PAddr nz; + u32 width; + Pica::TexturingRegs::TextureFormat format; + + auto operator <=>(const TextureCubeConfig& other) const = default; +}; + +using SurfaceSet = std::set; + +using SurfaceRegions = boost::icl::interval_set; + +using SurfaceMap = boost::icl::interval_map; + +using SurfaceCache = boost::icl::interval_map; + +static_assert(std::is_same() && + std::is_same(), + "Incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple>; +using SurfaceSurfaceRect_Tuple = std::tuple>; + +using PageMap = boost::icl::interval_map; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +/** + * A watcher that notifies whether a cached surface has been changed. This is useful for caching + * surface collection objects, including texture cube and mipmap. + */ +struct SurfaceWatcher { +public: + explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} + + // Checks whether the surface has been changed. + bool IsValid() const { + return !surface.expired() && valid; + } + + // Marks that the content of the referencing surface has been updated to the watcher user. + void Validate() { + ASSERT(!surface.expired()); + valid = true; + } + + // Gets the referencing surface. Returns null if the surface has been destroyed + Surface Get() const { + return surface.lock(); + } + +private: + friend struct CachedSurface; + std::weak_ptr surface; + bool valid = false; +}; + +class RasterizerCache; + +struct CachedSurface : SurfaceParams, std::enable_shared_from_this { + CachedSurface(RasterizerCache& owner) : owner{owner} {} + ~CachedSurface(); + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadBuffer(PAddr load_start, PAddr load_end); + void FlushBuffer(PAddr flush_start, PAddr flush_end); + + // Custom texture loading and dumping + bool LoadCustomTexture(u64 tex_hash); + //void DumpTexture(GLuint target_tex, u64 tex_hash); + + // Upload/Download data in gl_buffer in/to this surface's texture + void UploadTexture(Common::Rectangle rect); + void DownloadTexture(const Common::Rectangle& rect); + + void Fill(Common::Rectangle rect, const u8* data); + + // Queries the surface for the fill/copy capability + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsSurfaceFullyInvalid() const { + auto interval = GetInterval(); + return *invalid_regions.equal_range(interval).first == interval; + } + + std::shared_ptr CreateWatcher() { + auto watcher = std::make_shared(weak_from_this()); + watchers.push_front(watcher); + return watcher; + } + + void InvalidateAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + } + } + } + + void UnlinkAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + locked->surface.reset(); + } + } + watchers.clear(); + } + + static constexpr unsigned int GetBytesPerPixel(PixelFormat format) { + // D24 is almost always 4 byte aligned + return format == PixelFormat::Invalid + ? 0 + : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) + ? 4 + : SurfaceParams::GetFormatBpp(format) / 8; + } + +public: + bool registered = false; + SurfaceRegions invalid_regions; + + u32 fill_size = 0; // Number of bytes to read from fill_data + std::array fill_data; + TextureHandle texture; + std::vector gl_buffer; + + // max mipmap level that has been attached to the texture + u32 max_level = 0; + + // level_watchers[i] watches the (i+1)-th level mipmap source surface + std::array, 7> level_watchers; + + bool is_custom = false; + Core::CustomTexInfo custom_tex_info; + +private: + RasterizerCache& owner; + std::list> watchers; +}; + +struct CachedTextureCube { + TextureHandle texture; + u16 res_scale = 1; + std::shared_ptr px, nx, py, ny, pz, nz; +}; + +class BackendBase; + +class RasterizerCache { +public: + RasterizerCache(std::unique_ptr& backend); + ~RasterizerCache(); + + // Allocates a 2D texture for a surface possibly reusing an existing one + TextureHandle AllocateSurfaceTexture(const TextureInfo& info); + + // Defers destruction of texture handle in case of reuse + void RecycleTexture(TextureHandle&& handle); + + // Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, + const Surface& dst_surface, const Common::Rectangle& dst_rect); + + // Copy one surface's region to another + void CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval); + + // Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + // Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from + // 3DS memory to OpenGL and caches it (if not already cached) + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + // Get a surface based on the texture configuration + Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config); + Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); + + // Get a texture cube based on the texture configuration + const CachedTextureCube& GetTextureCube(const TextureCubeConfig& config); + + // Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const Common::Rectangle& viewport_rect); + + // Get a surface that matches the fill config + Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config); + + // Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + + // Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr); + + // Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner); + + // Flush all cached resources tracked by this cache manager + void FlushAll(); + + // Clear all cached resources tracked by this cache manager + void ClearAll(bool flush); + +private: + void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); + + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u32 size); + + // Returns false if there is a surface in the cache at the interval with the same bit-width, + bool NoUnimplementedReinterpretations(const Surface& surface, + SurfaceParams& params, + const SurfaceInterval& interval); + + // Return true if a surface with an invalid pixel format exists at the interval + bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); + + // Attempt to find a reinterpretable surface in the cache and use it to copy for validation + bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, + const SurfaceInterval& interval); + + // Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + // Register surface into the cache + void RegisterSurface(const Surface& surface); + + // Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + // Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); + + // Fills the entire or part of a surface with the provided color/depth data + bool FillSurface(const Surface& surface, const u8* fill_data, Common::Rectangle fill_rect); + +private: + // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead + std::unordered_multimap host_texture_recycler; + std::recursive_mutex mutex; + + // Separate cache for texture cubes + std::unordered_map texture_cube_cache; + + // Cached surfaces + SurfaceCache surface_cache; + PageMap cached_pages; + SurfaceMap dirty_regions; + SurfaceSet remove_surfaces; + u16 resolution_scale_factor; + + // Keeping a framebuffer cache is both useful for storing render targets but also + // for accelerating texture clear operations + std::unordered_map framebuffer_cache; + +public: + std::unique_ptr& backend; + std::unique_ptr texture_filterer; + //std::unique_ptr format_reinterpreter; + //std::unique_ptr texture_downloader_es; +}; + +} // namespace VideoCore diff --git a/src/video_core/common/surface_params.cpp b/src/video_core/common/surface_params.cpp new file mode 100644 index 000000000..01bc08097 --- /dev/null +++ b/src/video_core/common/surface_params.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "video_core/common/rasterizer_cache.h" +#include "video_core/common/surface_params.h" + +namespace VideoCore { + +SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { + SurfaceParams params = *this; + const u32 tiled_size = is_tiled ? 8 : 1; + const u32 stride_tiled_bytes = BytesInPixels(stride * tiled_size); + PAddr aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); + PAddr aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); + + if (aligned_end - aligned_start > stride_tiled_bytes) { + params.addr = aligned_start; + params.height = (aligned_end - aligned_start) / BytesInPixels(stride); + } else { + // 1 row + ASSERT(aligned_end - aligned_start == stride_tiled_bytes); + const u32 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); + aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); + aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); + params.addr = aligned_start; + params.width = PixelsInBytes(aligned_end - aligned_start) / tiled_size; + params.stride = params.width; + params.height = tiled_size; + } + params.UpdateParams(); + + return params; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle unscaled_rect) const { + if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { + return {}; + } + + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; + unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; + unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; + } + + const u32 stride_tiled = !is_tiled ? stride : stride * 8; + + const u32 pixel_offset = + stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + + unscaled_rect.left; + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); + + return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; +} + +SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { + SurfaceInterval result{}; + const auto valid_regions = + SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1)), + addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1))}; + + if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), + addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), + }; + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); + + if (is_tiled) { + const int x0 = (begin_pixel_index % (stride * 8)) / 8; + const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom + return Common::Rectangle(x0, height - y0, x0 + sub_surface.width, + height - (y0 + sub_surface.height)); + } + + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + // Bottom to top + return Common::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); +} + +Common::Rectangle SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return std::tie(other_surface.addr, other_surface.width, other_surface.height, + other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == + std::tie(addr, width, height, stride, pixel_format, is_tiled) && + pixel_format != PixelFormat::Invalid; +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + return sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).right <= stride; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && + addr <= expanded_surface.end && expanded_surface.addr <= end && + is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % + BytesInPixels(stride * (is_tiled ? 8 : 1)) == + 0; +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || + end < texcopy_params.end) { + return false; + } + if (texcopy_params.width != texcopy_params.stride) { + const u32 tile_stride = BytesInPixels(stride * (is_tiled ? 8 : 1)); + return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && + ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; + } + return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); +} + +} // namespace VideoCore diff --git a/src/video_core/common/surface_params.h b/src/video_core/common/surface_params.h new file mode 100644 index 000000000..53f2d842e --- /dev/null +++ b/src/video_core/common/surface_params.h @@ -0,0 +1,270 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_texturing.h" + +namespace VideoCore { + +struct CachedSurface; +using Surface = std::shared_ptr; + +using SurfaceInterval = boost::icl::right_open_interval; + +struct SurfaceParams { +private: + static constexpr std::array BPP_TABLE = { + 32, // RGBA8 + 24, // RGB8 + 16, // RGB5A1 + 16, // RGB565 + 16, // RGBA4 + 16, // IA8 + 16, // RG8 + 8, // I8 + 8, // A8 + 8, // IA4 + 4, // I4 + 4, // A4 + 4, // ETC1 + 8, // ETC1A4 + 16, // D16 + 0, + 24, // D24 + 32, // D24S8 + }; + +public: + enum class PixelFormat { + // First 5 formats are shared between textures and color buffers + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + + // Texture-only formats + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, + + // Depth buffer-only formats + D16 = 14, + // gap + D24 = 16, + D24S8 = 17, + + Invalid = 255, + }; + + enum class SurfaceType { + Color = 0, + Texture = 1, + Depth = 2, + DepthStencil = 3, + Fill = 4, + Invalid = 5 + }; + + static constexpr unsigned int GetFormatBpp(PixelFormat format) { + const auto format_idx = static_cast(format); + DEBUG_ASSERT_MSG(format_idx < BPP_TABLE.size(), "Invalid pixel format {}", format_idx); + return BPP_TABLE[format_idx]; + } + + unsigned int GetFormatBpp() const { + return GetFormatBpp(pixel_format); + } + + static std::string_view PixelFormatAsString(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8: + return "RGBA8"; + case PixelFormat::RGB8: + return "RGB8"; + case PixelFormat::RGB5A1: + return "RGB5A1"; + case PixelFormat::RGB565: + return "RGB565"; + case PixelFormat::RGBA4: + return "RGBA4"; + case PixelFormat::IA8: + return "IA8"; + case PixelFormat::RG8: + return "RG8"; + case PixelFormat::I8: + return "I8"; + case PixelFormat::A8: + return "A8"; + case PixelFormat::IA4: + return "IA4"; + case PixelFormat::I4: + return "I4"; + case PixelFormat::A4: + return "A4"; + case PixelFormat::ETC1: + return "ETC1"; + case PixelFormat::ETC1A4: + return "ETC1A4"; + case PixelFormat::D16: + return "D16"; + case PixelFormat::D24: + return "D24"; + case PixelFormat::D24S8: + return "D24S8"; + default: + return "Not a real pixel format"; + } + } + + static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { + return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) { + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) { + return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) + : PixelFormat::Invalid; + } + + static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { + switch (format) { + // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat + case GPU::Regs::PixelFormat::RGB565: + return PixelFormat::RGB565; + case GPU::Regs::PixelFormat::RGB5A1: + return PixelFormat::RGB5A1; + default: + return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; + } + } + + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { + SurfaceType a_type = GetFormatType(pixel_format_a); + SurfaceType b_type = GetFormatType(pixel_format_b); + + if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && + (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + return true; + } + + if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { + return true; + } + + if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { + return true; + } + + return false; + } + + static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { + if ((unsigned int)pixel_format < 5) { + return SurfaceType::Color; + } + + if ((unsigned int)pixel_format < 14) { + return SurfaceType::Texture; + } + + if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { + return SurfaceType::Depth; + } + + if (pixel_format == PixelFormat::D24S8) { + return SurfaceType::DepthStencil; + } + + return SurfaceType::Invalid; + } + + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" + /// and "pixel_format" + void UpdateParams() { + if (stride == 0) { + stride = width; + } + type = GetFormatType(pixel_format); + size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) + : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); + end = addr + size; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval(addr, end); + } + + // Returns the outer rectangle containing "interval" + SurfaceParams FromInterval(SurfaceInterval interval) const; + + SurfaceInterval GetSubRectInterval(Common::Rectangle unscaled_rect) const; + + // Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + + u32 GetScaledWidth() const { + return width * res_scale; + } + + u32 GetScaledHeight() const { + return height * res_scale; + } + + Common::Rectangle GetRect() const { + return {0, height, width, 0}; + } + + Common::Rectangle GetScaledRect() const { + return {0, GetScaledHeight(), GetScaledWidth(), 0}; + } + + u32 PixelsInBytes(u32 size) const { + return size * CHAR_BIT / GetFormatBpp(pixel_format); + } + + u32 BytesInPixels(u32 pixels) const { + return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + Common::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; + Common::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u32 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +} // namespace VideoCore diff --git a/src/video_core/common/texture.h b/src/video_core/common/texture.h index 2a5bf9ce7..2bc9f4b6d 100644 --- a/src/video_core/common/texture.h +++ b/src/video_core/common/texture.h @@ -7,6 +7,7 @@ #include #include "common/hash.h" #include "common/intrusive_ptr.h" +#include "common/math_util.h" #include "video_core/regs_texturing.h" namespace VideoCore { @@ -50,6 +51,12 @@ enum class TextureViewType : u8 { * @param width, height are the extent of the rectangle */ struct Rect2D { + Rect2D() = default; + Rect2D(s32 x, s32 y, u32 width, u32 height) : + x(x), y(y), width(width), height(height) {} + Rect2D(Common::Rectangle rect) : + x(rect.left), y(rect.bottom), width(rect.GetWidth()), height(rect.GetHeight()) {} + s32 x = 0; s32 y = 0; u32 width = 0; @@ -67,6 +74,10 @@ struct TextureInfo { TextureViewType view_type = TextureViewType::Undefined; TextureFormat format = TextureFormat::Undefined; + void UpdateMipLevels() { + levels = std::log2(std::max(width, height)) + 1; + } + const u64 Hash() const { return Common::ComputeStructHash64(*this); } @@ -84,39 +95,52 @@ public: TextureBase(const TextureInfo& info) : info(info) {} virtual ~TextureBase() = default; - /// Uploads pixel data to the GPU memory + // Disable copy constructor + TextureBase(const TextureBase&) = delete; + TextureBase& operator=(const TextureBase&) = delete; + + // Uploads pixel data to the GPU memory virtual void Upload(Rect2D rectangle, u32 stride, std::span data, u32 level = 0) {}; - /// Downloads pixel data from GPU memory + // Downloads pixel data from GPU memory virtual void Download(Rect2D rectangle, u32 stride, std::span data, u32 level = 0) {}; - /// Copies the rectangle area specified to the destionation texture - virtual void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect, - u32 src_level = 0, u32 dest_level = 0) {}; + // Copies the rectangle area specified to the destionation texture + virtual void BlitTo(TextureHandle dest, Rect2D source_rect, Rect2D dest_rect, + u32 src_level = 0, u32 dest_level = 0, + u32 src_layer = 0, u32 dest_layer = 0) {}; - /// Returns the unique texture identifier + // Generates all possible mipmaps from the texture + virtual void GenerateMipmaps() {}; + + // Returns the texture info structure + TextureInfo GetInfo() const { + return info; + } + + // Returns the unique texture identifier const u64 GetHash() const { return info.Hash(); } - /// Returns the width of the texture + // Returns the width of the texture u16 GetWidth() const { return info.width; } - /// Returns the height of the texture + // Returns the height of the texture u16 GetHeight() const { return info.height; } - /// Returns the number of mipmap levels allocated + // Returns the number of mipmap levels allocated u16 GetMipLevels() const { return info.levels; } - /// Returns the pixel format + // Returns the pixel format TextureFormat GetFormat() const { return info.format; } @@ -146,6 +170,10 @@ public: SamplerBase(SamplerInfo info) : info(info) {} virtual ~SamplerBase() = default; + // Disable copy constructor + SamplerBase(const SamplerBase&) = delete; + SamplerBase& operator=(const SamplerBase&) = delete; + protected: SamplerInfo info{}; }; diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 60b0af059..2b79953ee 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -10,20 +10,20 @@ namespace Pica { template -PrimitiveAssembler::PrimitiveAssembler(PipelineRegs::TriangleTopology topology) +PrimitiveAssembler::PrimitiveAssembler(Pica::TriangleTopology topology) : topology(topology) {} template void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, const TriangleHandler& triangle_handler) { switch (topology) { - case PipelineRegs::TriangleTopology::List: - case PipelineRegs::TriangleTopology::Shader: + case Pica::TriangleTopology::List: + case Pica::TriangleTopology::Shader: if (buffer_index < 2) { buffer[buffer_index++] = vtx; } else { buffer_index = 0; - if (topology == PipelineRegs::TriangleTopology::Shader && winding) { + if (topology == Pica::TriangleTopology::Shader && winding) { triangle_handler(buffer[1], buffer[0], vtx); winding = false; } else { @@ -32,8 +32,8 @@ void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, } break; - case PipelineRegs::TriangleTopology::Strip: - case PipelineRegs::TriangleTopology::Fan: + case Pica::TriangleTopology::Strip: + case Pica::TriangleTopology::Fan: if (strip_ready) triangle_handler(buffer[0], buffer[1], vtx); @@ -41,9 +41,9 @@ void PrimitiveAssembler::SubmitVertex(const VertexType& vtx, strip_ready |= (buffer_index == 1); - if (topology == PipelineRegs::TriangleTopology::Strip) + if (topology == Pica::TriangleTopology::Strip) buffer_index = !buffer_index; - else if (topology == PipelineRegs::TriangleTopology::Fan) + else if (topology == Pica::TriangleTopology::Fan) buffer_index = 1; break; @@ -66,7 +66,7 @@ void PrimitiveAssembler::Reset() { } template -void PrimitiveAssembler::Reconfigure(PipelineRegs::TriangleTopology topology) { +void PrimitiveAssembler::Reconfigure(Pica::TriangleTopology topology) { Reset(); this->topology = topology; } @@ -77,7 +77,7 @@ bool PrimitiveAssembler::IsEmpty() const { } template -PipelineRegs::TriangleTopology PrimitiveAssembler::GetTopology() const { +Pica::TriangleTopology PrimitiveAssembler::GetTopology() const { return topology; } diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index c98976a7d..e4689aed5 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -21,8 +21,7 @@ struct PrimitiveAssembler { using TriangleHandler = std::function; - explicit PrimitiveAssembler( - PipelineRegs::TriangleTopology topology = PipelineRegs::TriangleTopology::List); + explicit PrimitiveAssembler(Pica::TriangleTopology topology = Pica::TriangleTopology::List); /* * Queues a vertex, builds primitives from the vertex queue according to the given @@ -46,7 +45,7 @@ struct PrimitiveAssembler { /** * Reconfigures the PrimitiveAssembler to use a different triangle topology. */ - void Reconfigure(PipelineRegs::TriangleTopology topology); + void Reconfigure(Pica::TriangleTopology topology); /** * Returns whether the PrimitiveAssembler has an empty internal buffer. @@ -56,10 +55,10 @@ struct PrimitiveAssembler { /** * Returns the current topology. */ - PipelineRegs::TriangleTopology GetTopology() const; + Pica::TriangleTopology GetTopology() const; private: - PipelineRegs::TriangleTopology topology; + Pica::TriangleTopology topology; int buffer_index = 0; std::array buffer; diff --git a/src/video_core/renderer_vulkan/vk_backend.cpp b/src/video_core/renderer_vulkan/vk_backend.cpp index 5200f7bcb..a2073cd9d 100644 --- a/src/video_core/renderer_vulkan/vk_backend.cpp +++ b/src/video_core/renderer_vulkan/vk_backend.cpp @@ -8,9 +8,38 @@ #include "video_core/renderer_vulkan/vk_backend.h" #include "video_core/renderer_vulkan/vk_buffer.h" #include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/renderer_vulkan/vk_framebuffer.h" namespace VideoCore::Vulkan { +constexpr vk::PipelineBindPoint ToVkPipelineBindPoint(PipelineType type) { + switch (type) { + case PipelineType::Graphics: + return vk::PipelineBindPoint::eGraphics; + case PipelineType::Compute: + return vk::PipelineBindPoint::eCompute; + } +} + +constexpr vk::Rect2D ToVkRect2D(Rect2D rect) { + return vk::Rect2D{ + .offset = vk::Offset2D{rect.x, rect.y}, + .extent = vk::Extent2D{rect.width, rect.height} + }; +} + +constexpr vk::IndexType ToVkIndexType(AttribType type) { + switch (type) { + case AttribType::Short: + return vk::IndexType::eUint16; + case AttribType::Int: + return vk::IndexType::eUint32; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown index type {}!", type); + UNREACHABLE(); + } +} + Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window), instance(window), swapchain(instance, instance.GetSurface()), scheduler(instance) { @@ -24,6 +53,7 @@ Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window), // Pre-create all needed renderpasses by the renderer constexpr std::array color_formats = { + vk::Format::eUndefined, vk::Format::eR8G8B8A8Unorm, vk::Format::eR8G8B8Unorm, vk::Format::eR5G5B5A1UnormPack16, @@ -32,18 +62,41 @@ Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window), }; constexpr std::array depth_stencil_formats = { + vk::Format::eUndefined, vk::Format::eD16Unorm, vk::Format::eX8D24UnormPack32, vk::Format::eD24UnormS8Uint, }; // Create all required renderpasses - for (u32 color = 0; color < MAX_COLOR_FORMATS; color++) { - for (u32 depth = 0; depth < MAX_DEPTH_FORMATS; depth++) { + for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { + for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { + if (color == 0 && depth == 0) continue; + u32 index = color * MAX_COLOR_FORMATS + depth; renderpass_cache[index] = CreateRenderPass(color_formats[color], depth_stencil_formats[depth]); } } + + constexpr std::array pool_sizes = { + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024}, + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024}, + vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 2048}, + vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 1024} + }; + + const vk::DescriptorPoolCreateInfo pool_info = { + .maxSets = 2048, + .poolSizeCount = pool_sizes.size(), + .pPoolSizes = pool_sizes.data() + }; + + // Create descriptor pools + vk::Device device = instance.GetDevice(); + for (u32 pool = 0; pool < SCHEDULER_COMMAND_COUNT; pool++) { + descriptor_pools[pool] = device.createDescriptorPool(pool_info); + } } Backend::~Backend() { @@ -59,52 +112,118 @@ Backend::~Backend() { */ BufferHandle Backend::CreateBuffer(BufferInfo info) { static ObjectPool buffer_pool; - return IntrusivePtr{buffer_pool.Allocate(info)}; + return BufferHandle{buffer_pool.Allocate(instance, scheduler, info)}; } FramebufferHandle Backend::CreateFramebuffer(FramebufferInfo info) { + static ObjectPool framebuffer_pool; + + // Get renderpass + TextureFormat color = info.color.IsValid() ? info.color->GetFormat() : TextureFormat::Undefined; + TextureFormat depth = info.depth_stencil.IsValid() ? info.depth_stencil->GetFormat() : TextureFormat::Undefined; + vk::RenderPass renderpass = GetRenderPass(color, depth); + + return FramebufferHandle{framebuffer_pool.Allocate(instance, info, renderpass)}; } TextureHandle Backend::CreateTexture(TextureInfo info) { static ObjectPool texture_pool; - return IntrusivePtr{texture_pool.Allocate(info)}; + return TextureHandle{texture_pool.Allocate(instance, scheduler, info)}; } PipelineHandle Backend::CreatePipeline(PipelineType type, PipelineInfo info) { static ObjectPool pipeline_pool; - // Find a pipeline layout first + // Get renderpass + vk::RenderPass renderpass = GetRenderPass(info.color_attachment, info.depth_attachment); + + // Find a pipeline layout first if (auto iter = pipeline_layouts.find(info.layout); iter != pipeline_layouts.end()) { PipelineLayout& layout = iter->second; - return IntrusivePtr{pipeline_pool.Allocate(instance, layout, type, info, cache)}; + return PipelineHandle{pipeline_pool.Allocate(instance, layout, type, info, renderpass, cache)}; } // Create the layout auto result = pipeline_layouts.emplace(info.layout, PipelineLayout{instance, info.layout}); - return IntrusivePtr{pipeline_pool.Allocate(instance, result.first->second, type, info, cache)}; + return PipelineHandle{pipeline_pool.Allocate(instance, result.first->second, type, info, renderpass, cache)}; } SamplerHandle Backend::CreateSampler(SamplerInfo info) { static ObjectPool sampler_pool; - return IntrusivePtr{sampler_pool.Allocate(info)}; + return SamplerHandle{sampler_pool.Allocate(info)}; } -void Backend::Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, +void Backend::Draw(PipelineHandle pipeline_handle, FramebufferHandle draw_framebuffer, BufferHandle vertex_buffer, u32 base_vertex, u32 num_vertices) { + // Bind descriptor sets vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + BindDescriptorSets(pipeline_handle); - Buffer* vertex = static_cast(vertex_buffer.Get()); - command_buffer.bindVertexBuffers(0, vertex->GetHandle(), {0}); + // Bind vertex buffer + const Buffer* vertex = static_cast(vertex_buffer.Get()); + command_buffer.bindVertexBuffers(0, vertex->GetHandle(), vertex->GetBindOffset()); + + // Begin renderpass + const Framebuffer* framebuffer = static_cast(draw_framebuffer.Get()); + const vk::RenderPassBeginInfo renderpass_begin = { + .renderPass = framebuffer->GetRenderpass(), + .framebuffer = framebuffer->GetHandle(), + .renderArea = ToVkRect2D(framebuffer->GetDrawRectangle()), + .clearValueCount = 0, + .pClearValues = nullptr + }; + + command_buffer.beginRenderPass(renderpass_begin, vk::SubpassContents::eInline); + + // Bind pipeline + const Pipeline* pipeline = static_cast(pipeline_handle.Get()); + command_buffer.bindPipeline(ToVkPipelineBindPoint(pipeline->GetType()), pipeline->GetHandle()); // Submit draw command_buffer.draw(num_vertices, 1, base_vertex, 0); + + // End renderpass + command_buffer.endRenderPass(); } -void Backend::DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, - BufferHandle vertex_buffer, BufferHandle index_buffer, +void Backend::DrawIndexed(PipelineHandle pipeline_handle, FramebufferHandle draw_framebuffer, + BufferHandle vertex_buffer, BufferHandle index_buffer, AttribType index_type, u32 base_index, u32 num_indices, u32 base_vertex) { + // Bind descriptor sets + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + BindDescriptorSets(pipeline_handle); + + // Bind vertex buffer + const Buffer* vertex = static_cast(vertex_buffer.Get()); + command_buffer.bindVertexBuffers(0, vertex->GetHandle(), vertex->GetBindOffset()); + + // Bind index buffer + const Buffer* index = static_cast(index_buffer.Get()); + command_buffer.bindIndexBuffer(index->GetHandle(), index->GetBindOffset(), ToVkIndexType(index_type)); + + // Begin renderpass + const Framebuffer* framebuffer = static_cast(draw_framebuffer.Get()); + const vk::RenderPassBeginInfo renderpass_begin = { + .renderPass = framebuffer->GetRenderpass(), + .framebuffer = framebuffer->GetHandle(), + .renderArea = ToVkRect2D(framebuffer->GetDrawRectangle()), + .clearValueCount = 0, + .pClearValues = nullptr + }; + + command_buffer.beginRenderPass(renderpass_begin, vk::SubpassContents::eInline); + + // Bind pipeline + const Pipeline* pipeline = static_cast(pipeline_handle.Get()); + command_buffer.bindPipeline(ToVkPipelineBindPoint(pipeline->GetType()), pipeline->GetHandle()); + + // Submit draw + command_buffer.drawIndexed(num_indices, 1, base_index, base_vertex, 0); + + // End renderpass + command_buffer.endRenderPass(); } @@ -175,4 +294,39 @@ vk::RenderPass Backend::CreateRenderPass(vk::Format color, vk::Format depth) con return device.createRenderPass(renderpass_info); } +vk::RenderPass Backend::GetRenderPass(TextureFormat color, TextureFormat depth) const { + u32 color_index = color != TextureFormat::Undefined ? static_cast(color) + 1 : 0; + u32 depth_index = depth != TextureFormat::Undefined ? static_cast(depth) - 4 : 0; + return renderpass_cache[color_index * MAX_COLOR_FORMATS + depth_index]; +} + +void Backend::BindDescriptorSets(PipelineHandle handle) { + Pipeline* pipeline = static_cast(handle.Get()); + PipelineLayout& pipeline_layout = pipeline->GetOwner(); + + // Allocate required descriptor sets + // TODO: Maybe cache them? + u32 pool_index = scheduler.GetCurrentSlotIndex(); + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = descriptor_pools[pool_index], + .descriptorSetCount = pipeline_layout.GetDescriptorSetLayoutCount(), + .pSetLayouts = pipeline_layout.GetDescriptorSetLayouts() + }; + + vk::Device device = instance.GetDevice(); + auto descriptor_sets = device.allocateDescriptorSets(alloc_info); + + // Write data to the descriptor sets + for (u32 set = 0; set < descriptor_sets.size(); set++) { + device.updateDescriptorSetWithTemplate(descriptor_sets[set], + pipeline_layout.GetUpdateTemplate(set), + pipeline_layout.GetData(set)); + } + + // Bind the descriptor sets + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + command_buffer.bindDescriptorSets(ToVkPipelineBindPoint(handle->GetType()), pipeline_layout.GetLayout(), + 0, descriptor_sets, {}); +} + } // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_backend.h b/src/video_core/renderer_vulkan/vk_backend.h index 2ca8b3b22..f2e9ffc24 100644 --- a/src/video_core/renderer_vulkan/vk_backend.h +++ b/src/video_core/renderer_vulkan/vk_backend.h @@ -15,7 +15,8 @@ namespace VideoCore::Vulkan { class Texture; -constexpr u32 RENDERPASS_COUNT = MAX_COLOR_FORMATS * MAX_DEPTH_FORMATS; +constexpr u32 RENDERPASS_COUNT = (MAX_COLOR_FORMATS + 1) * (MAX_DEPTH_FORMATS + 1); +constexpr u32 DESCRIPTOR_BANK_SIZE = 64; class Backend : public VideoCore::BackendBase { public: @@ -39,7 +40,7 @@ public: u32 base_vertex, u32 num_vertices) override; void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer, - BufferHandle vertex_buffer, BufferHandle index_buffer, + BufferHandle vertex_buffer, BufferHandle index_buffer, AttribType index_type, u32 base_index, u32 num_indices, u32 base_vertex) override; void DispatchCompute(PipelineHandle pipeline, Common::Vec3 groupsize, @@ -57,6 +58,10 @@ public: private: vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth) const; + vk::RenderPass GetRenderPass(TextureFormat color, TextureFormat depth) const; + + // Allocates and binds descriptor sets for the provided pipeline + void BindDescriptorSets(PipelineHandle pipeline); private: Instance instance; @@ -70,6 +75,9 @@ private: // Pipeline layout cache std::unordered_map pipeline_layouts; + + // Descriptor pools + std::array descriptor_pools; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_framebuffer.cpp b/src/video_core/renderer_vulkan/vk_framebuffer.cpp new file mode 100644 index 000000000..76d4b0393 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_framebuffer.cpp @@ -0,0 +1,97 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "video_core/renderer_vulkan/vk_framebuffer.h" +#include "video_core/renderer_vulkan/vk_texture.h" +#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace VideoCore::Vulkan { + +vk::Rect2D ToVkRect2D(Rect2D rect) { + return vk::Rect2D{ + .offset = {rect.x, rect.y}, + .extent = {rect.width, rect.height} + }; +} + +Framebuffer::Framebuffer(Instance& instance, CommandScheduler& scheduler, const FramebufferInfo& info, + vk::RenderPass load_renderpass, vk::RenderPass clear_renderpass) : + FramebufferBase(info), instance(instance), scheduler(scheduler), load_renderpass(load_renderpass), + clear_renderpass(clear_renderpass) { + + const Texture* color = static_cast(info.color.Get()); + const Texture* depth_stencil = static_cast(info.depth_stencil.Get()); + + u32 attachment_count = 0; + std::array attachments; + + if (color) { + attachments[attachment_count++] = color->GetView(); + } + + if (depth_stencil) { + attachments[attachment_count++] = depth_stencil->GetView(); + } + + const Texture* valid_texture = color ? color : depth_stencil; + const vk::FramebufferCreateInfo framebuffer_info = { + // The load and clear renderpass are compatible according to the specification + // so there is no need to create multiple framebuffers + .renderPass = load_renderpass, + .attachmentCount = attachments.size(), + .pAttachments = attachments.data(), + .width = valid_texture->GetWidth(), + .height = valid_texture->GetHeight(), + .layers = 1 + }; + + vk::Device device = instance.GetDevice(); + framebuffer = device.createFramebuffer(framebuffer_info); +} + +Framebuffer::~Framebuffer() { + vk::Device device = instance.GetDevice(); + device.destroyFramebuffer(framebuffer); +} + +void Framebuffer::DoClear(Common::Rectangle rect, Common::Vec4f color, float depth, u8 stencil) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + + u32 clear_value_count = 0; + std::array clear_values{}; + + if (info.color.IsValid()) { + vk::ClearColorValue clear_color{}; + std::memcpy(clear_color.float32.data(), color.AsArray(), sizeof(float) * 4); + + clear_values[clear_value_count++] = vk::ClearValue { + .color = clear_color + }; + } + + if (info.depth_stencil.IsValid()) { + clear_values[clear_value_count++] = vk::ClearValue { + .depthStencil = vk::ClearDepthStencilValue { + .depth = depth, + .stencil = stencil + } + }; + } + + const vk::RenderPassBeginInfo begin_info = { + .renderPass = clear_renderpass, + .framebuffer = framebuffer, + .renderArea = ToVkRect2D(rect), + .clearValueCount = clear_value_count, + .pClearValues = clear_values.data() + }; + + // Begin clear pass + command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); + command_buffer.endRenderPass(); +} + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_framebuffer.h b/src/video_core/renderer_vulkan/vk_framebuffer.h new file mode 100644 index 000000000..3151f014a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_framebuffer.h @@ -0,0 +1,38 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/common/framebuffer.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore::Vulkan { + +class Instance; +class CommandScheduler; + +class Framebuffer : public VideoCore::FramebufferBase { +public: + Framebuffer(Instance& instance, CommandScheduler& scheduler, const FramebufferInfo& info, + vk::RenderPass load_renderpass, vk::RenderPass clear_renderpass); + ~Framebuffer() override; + + void DoClear(Common::Rectangle rect, Common::Vec4f color, float depth, u8 stencil) override; + + vk::Framebuffer GetHandle() const { + return framebuffer; + } + + vk::RenderPass GetLoadRenderpass() const { + return load_renderpass; + } + +private: + Instance& instance; + CommandScheduler& scheduler; + vk::Framebuffer framebuffer; + vk::RenderPass load_renderpass, clear_renderpass; +}; + +} // namespace VideoCore::Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.cpp b/src/video_core/renderer_vulkan/vk_pipeline.cpp index b5dd38ac1..4f9be4af8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline.cpp @@ -183,8 +183,8 @@ PipelineLayout::~PipelineLayout() { } } -Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, - PipelineInfo info, vk::PipelineCache cache) : PipelineBase(type, info), +Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, PipelineInfo info, + vk::RenderPass renderpass, vk::PipelineCache cache) : PipelineBase(type, info), instance(instance), owner(owner) { vk::Device device = instance.GetDevice(); @@ -331,7 +331,7 @@ Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, .layout = owner.GetLayout(), - .renderPass = {} + .renderPass = renderpass }; if (auto result = device.createGraphicsPipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h index 2489d38ec..549b0a71e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -38,11 +38,20 @@ public: update_data[set][binding] = data; } + // Returns the number of descriptor set layouts + u32 GetDescriptorSetLayoutCount() const { + return set_layout_count; + } + // Returns the most current descriptor update data std::span GetData(u32 set) { return std::span{update_data.at(set).data(), set_layout_count}; } + vk::DescriptorSetLayout* GetDescriptorSetLayouts() { + return set_layouts.data(); + } + // Returns the underlying vulkan pipeline layout handle vk::PipelineLayout GetLayout() const { return pipeline_layout; @@ -67,8 +76,8 @@ private: class Pipeline : public VideoCore::PipelineBase { public: - Pipeline(Instance& instance, PipelineLayout& owner, - PipelineType type, PipelineInfo info, vk::PipelineCache cache); + Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type, PipelineInfo info, + vk::RenderPass renderpass, vk::PipelineCache cache); ~Pipeline() override; void BindTexture(u32 group, u32 slot, TextureHandle handle) override; diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h index a3a8631fa..af4807cbb 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -70,8 +70,9 @@ private: struct CommandSlot { bool use_upload_buffer = false; u64 fence_counter = 0; - vk::CommandBuffer render_command_buffer, upload_command_buffer; vk::Fence fence = VK_NULL_HANDLE; + vk::CommandBuffer render_command_buffer; + vk::CommandBuffer upload_command_buffer; std::unique_ptr upload_buffer; std::vector cleanups; }; diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index 19cf64b59..57a7d29f7 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -76,8 +76,7 @@ inline vk::ImageViewType ToVkImageViewType(TextureViewType view_type) { Texture::Texture(Instance& instance, CommandScheduler& scheduler) : instance(instance), scheduler(scheduler) {} -Texture::Texture(Instance& instance, CommandScheduler& scheduler, - const TextureInfo& info) : TextureBase(info), +Texture::Texture(Instance& instance, CommandScheduler& scheduler, const TextureInfo& info) : TextureBase(info), instance(instance), scheduler(scheduler) { // Convert the input format to another that supports attachments @@ -122,10 +121,8 @@ Texture::Texture(Instance& instance, CommandScheduler& scheduler, image_view = device.createImageView(view_info); } -Texture::Texture(Instance& instance, CommandScheduler& scheduler, - vk::Image image, const TextureInfo& info) : TextureBase(info), - instance(instance), scheduler(scheduler), image(image), - is_texture_owned(false) { +Texture::Texture(Instance& instance, CommandScheduler& scheduler, vk::Image image, const TextureInfo& info) : + TextureBase(info), instance(instance), scheduler(scheduler), image(image), is_texture_owned(false) { const vk::ImageViewCreateInfo view_info = { .image = image, @@ -157,8 +154,7 @@ Texture::~Texture() { } } -void Texture::Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, - u32 level, u32 level_count) { +void Texture::Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, u32 level, u32 level_count) { ASSERT(level + level_count < TEXTURE_MAX_LEVELS); // Ensure all miplevels in the range have the same layout @@ -411,8 +407,101 @@ void Texture::Download(Rect2D rectangle, u32 stride, std::span data, u32 lev } } -StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler, - const TextureInfo& info) : +void Texture::BlitTo(TextureHandle dest, Rect2D source_rect, Rect2D dest_rect, u32 src_level, u32 dest_level, + u32 src_layer, u32 dest_layer) { + Texture* dest_texture = static_cast(dest.Get()); + + // Prepare images for transfer + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal); + dest_texture->Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal); + + const std::array source_offsets = { + vk::Offset3D{source_rect.x, source_rect.y, 0}, + vk::Offset3D{static_cast(source_rect.x + source_rect.width), + static_cast(source_rect.y + source_rect.height), 1} + }; + + const std::array dest_offsets = { + vk::Offset3D{dest_rect.x, dest_rect.y, 0}, + vk::Offset3D{static_cast(dest_rect.x + dest_rect.width), + static_cast(dest_rect.y + dest_rect.height), 1} + }; + + const vk::ImageBlit blit_area = { + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = src_level, + .baseArrayLayer = src_layer, + .layerCount = 1 + }, + .srcOffsets = source_offsets, + .dstSubresource = { + .aspectMask = dest_texture->GetAspectFlags(), + .mipLevel = dest_level, + .baseArrayLayer = dest_layer, + .layerCount = 1 + }, + .dstOffsets = dest_offsets + }; + + command_buffer.blitImage(image, vk::ImageLayout::eTransferSrcOptimal, + dest_texture->GetHandle(), vk::ImageLayout::eTransferDstOptimal, + blit_area, vk::Filter::eNearest); + + // Revert changes to the layout + Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); + dest_texture->Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal); +} + +// TODO: Use AMD single pass downsampler +void Texture::GenerateMipmaps() { + s32 current_width = info.width; + s32 current_height = info.height; + + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + for (u32 i = 1; i < info.levels; i++) { + Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, i - 1); + Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, i); + + const std::array source_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{current_width, current_height, 1} + }; + + const std::array dest_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{current_width > 1 ? current_width / 2 : 1, + current_height > 1 ? current_height / 2 : 1, 1} + }; + + const vk::ImageBlit blit_area = { + .srcSubresource = { + .aspectMask = aspect, + .mipLevel = i - 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .srcOffsets = source_offsets, + .dstSubresource = { + .aspectMask = aspect, + .mipLevel = i, + .baseArrayLayer = 0, + .layerCount = 1 + }, + .dstOffsets = dest_offsets + }; + + command_buffer.blitImage(image, vk::ImageLayout::eTransferSrcOptimal, + image, vk::ImageLayout::eTransferDstOptimal, + blit_area, vk::Filter::eLinear); + } + + // Prepare for shader reads + Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal, 0, info.levels); +} + +StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler, const TextureInfo& info) : TextureBase(info), instance(instance), scheduler(scheduler) { format = ToVkFormat(info.format); diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index 4e53d83cd..149a40a29 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -24,33 +24,28 @@ public: Texture(Instance& instance, CommandScheduler& scheduler); // Constructor for texture creation - Texture(Instance& instance, CommandScheduler& scheduler, - const TextureInfo& info); + Texture(Instance& instance, CommandScheduler& scheduler, const TextureInfo& info); // Constructor for not owning textures (swapchain) - Texture(Instance& instance, CommandScheduler& scheduler, - vk::Image image, const TextureInfo& info); + Texture(Instance& instance, CommandScheduler& scheduler, vk::Image image, const TextureInfo& info); - ~Texture(); + ~Texture() override; - /// Uploads pixel data to the GPU memory - void Upload(Rect2D rectangle, u32 stride, std::span data, - u32 level = 0) override; + void Upload(Rect2D rectangle, u32 stride, std::span data, u32 level = 0) override; - /// Downloads pixel data from GPU memory - void Download(Rect2D rectangle, u32 stride, std::span data, - u32 level = 0) override; + void Download(Rect2D rectangle, u32 stride, std::span data, u32 level = 0) override; - /// Copies the rectangle area specified to the destionation texture - void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect, - u32 src_level = 0, u32 dest_level = 0) override; + void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect, u32 src_level = 0, + u32 dest_level = 0, u32 src_layer = 0, u32 dest_layer = 0) override; + + void GenerateMipmaps() override; /// Overrides the layout of provided image subresource void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1); /// Transitions part of the image to the provided layout - void Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, - u32 level = 0, u32 level_count = 1); + void Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout, u32 level = 0, + u32 level_count = 1); /// Returns the underlying vulkan image handle vk::Image GetHandle() const { @@ -68,6 +63,10 @@ public: return internal_format; } + vk::ImageAspectFlags GetAspectFlags() const { + return aspect; + } + /// Returns the current image layout vk::ImageLayout GetLayout(u32 level = 0) const { return layouts.at(level);