Merge pull request #9194 from FernandoS27/yfc-fermi2d
YFC - Fermi2D: Rework blit engine and add a software blitter.
This commit is contained in:
		@@ -28,6 +28,10 @@ add_library(video_core STATIC
 | 
			
		||||
    dirty_flags.h
 | 
			
		||||
    dma_pusher.cpp
 | 
			
		||||
    dma_pusher.h
 | 
			
		||||
    engines/sw_blitter/blitter.cpp
 | 
			
		||||
    engines/sw_blitter/blitter.h
 | 
			
		||||
    engines/sw_blitter/converter.cpp
 | 
			
		||||
    engines/sw_blitter/converter.h
 | 
			
		||||
    engines/const_buffer_info.h
 | 
			
		||||
    engines/engine_interface.h
 | 
			
		||||
    engines/engine_upload.cpp
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) {
 | 
			
		||||
    ASSERT(memory_manager);
 | 
			
		||||
    dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
 | 
			
		||||
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
 | 
			
		||||
    fermi_2d = std::make_unique<Engines::Fermi2D>();
 | 
			
		||||
    fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
 | 
			
		||||
    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
 | 
			
		||||
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
 | 
			
		||||
    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
 | 
			
		||||
 
 | 
			
		||||
@@ -3,17 +3,25 @@
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/microprofile.h"
 | 
			
		||||
#include "video_core/engines/fermi_2d.h"
 | 
			
		||||
#include "video_core/memory_manager.h"
 | 
			
		||||
#include "video_core/engines/sw_blitter/blitter.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
#include "video_core/surface.h"
 | 
			
		||||
#include "video_core/textures/decoders.h"
 | 
			
		||||
 | 
			
		||||
MICROPROFILE_DECLARE(GPU_BlitEngine);
 | 
			
		||||
MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128));
 | 
			
		||||
 | 
			
		||||
using VideoCore::Surface::BytesPerBlock;
 | 
			
		||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines {
 | 
			
		||||
 | 
			
		||||
Fermi2D::Fermi2D() {
 | 
			
		||||
using namespace Texture;
 | 
			
		||||
 | 
			
		||||
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
 | 
			
		||||
    sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
 | 
			
		||||
    // Nvidia's OpenGL driver seems to assume these values
 | 
			
		||||
    regs.src.depth = 1;
 | 
			
		||||
    regs.dst.depth = 1;
 | 
			
		||||
@@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Fermi2D::Blit() {
 | 
			
		||||
    MICROPROFILE_SCOPE(GPU_BlitEngine);
 | 
			
		||||
    LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
 | 
			
		||||
              regs.src.Address(), regs.dst.Address());
 | 
			
		||||
 | 
			
		||||
@@ -52,9 +61,16 @@ void Fermi2D::Blit() {
 | 
			
		||||
    UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
 | 
			
		||||
 | 
			
		||||
    const auto& args = regs.pixels_from_memory;
 | 
			
		||||
    constexpr s64 null_derivate = 1ULL << 32;
 | 
			
		||||
    Surface src = regs.src;
 | 
			
		||||
    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
 | 
			
		||||
    const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
 | 
			
		||||
                                 src.format != regs.dst.format;
 | 
			
		||||
    Config config{
 | 
			
		||||
        .operation = regs.operation,
 | 
			
		||||
        .filter = args.sample_mode.filter,
 | 
			
		||||
        .must_accelerate =
 | 
			
		||||
            args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu,
 | 
			
		||||
        .dst_x0 = args.dst_x0,
 | 
			
		||||
        .dst_y0 = args.dst_y0,
 | 
			
		||||
        .dst_x1 = args.dst_x0 + args.dst_width,
 | 
			
		||||
@@ -64,8 +80,7 @@ void Fermi2D::Blit() {
 | 
			
		||||
        .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
 | 
			
		||||
        .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
 | 
			
		||||
    };
 | 
			
		||||
    Surface src = regs.src;
 | 
			
		||||
    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
 | 
			
		||||
 | 
			
		||||
    const auto need_align_to_pitch =
 | 
			
		||||
        src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&
 | 
			
		||||
        static_cast<s32>(src.width) == config.src_x1 &&
 | 
			
		||||
@@ -78,8 +93,9 @@ void Fermi2D::Blit() {
 | 
			
		||||
        config.src_x1 -= config.src_x0;
 | 
			
		||||
        config.src_x0 = 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
 | 
			
		||||
        UNIMPLEMENTED();
 | 
			
		||||
        sw_blitter->Blit(src, regs.dst, config);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <cstddef>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include "common/bit_field.h"
 | 
			
		||||
#include "common/common_funcs.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
@@ -21,6 +22,10 @@ class RasterizerInterface;
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines {
 | 
			
		||||
 | 
			
		||||
namespace Blitter {
 | 
			
		||||
class SoftwareBlitEngine;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * This Engine is known as G80_2D. Documentation can be found in:
 | 
			
		||||
 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
 | 
			
		||||
@@ -32,7 +37,7 @@ namespace Tegra::Engines {
 | 
			
		||||
 | 
			
		||||
class Fermi2D final : public EngineInterface {
 | 
			
		||||
public:
 | 
			
		||||
    explicit Fermi2D();
 | 
			
		||||
    explicit Fermi2D(MemoryManager& memory_manager_);
 | 
			
		||||
    ~Fermi2D() override;
 | 
			
		||||
 | 
			
		||||
    /// Binds a rasterizer to this engine.
 | 
			
		||||
@@ -286,6 +291,7 @@ public:
 | 
			
		||||
    struct Config {
 | 
			
		||||
        Operation operation;
 | 
			
		||||
        Filter filter;
 | 
			
		||||
        bool must_accelerate;
 | 
			
		||||
        s32 dst_x0;
 | 
			
		||||
        s32 dst_y0;
 | 
			
		||||
        s32 dst_x1;
 | 
			
		||||
@@ -298,6 +304,7 @@ public:
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    VideoCore::RasterizerInterface* rasterizer = nullptr;
 | 
			
		||||
    std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
 | 
			
		||||
 | 
			
		||||
    /// Performs the copy from the source surface to the destination surface as configured in the
 | 
			
		||||
    /// registers.
 | 
			
		||||
 
 | 
			
		||||
@@ -62,7 +62,8 @@ void MaxwellDMA::Launch() {
 | 
			
		||||
 | 
			
		||||
        if (!is_src_pitch && !is_dst_pitch) {
 | 
			
		||||
            // If both the source and the destination are in block layout, assert.
 | 
			
		||||
            UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
 | 
			
		||||
            CopyBlockLinearToBlockLinear();
 | 
			
		||||
            ReleaseSemaphore();
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
 | 
			
		||||
    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
 | 
			
		||||
    UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
 | 
			
		||||
 | 
			
		||||
    const bool is_remapping = regs.launch_dma.remap_enable != 0;
 | 
			
		||||
 | 
			
		||||
    // Deswizzle the input and copy it over.
 | 
			
		||||
    const Parameters& src = regs.src_params;
 | 
			
		||||
    const Parameters& dst = regs.dst_params;
 | 
			
		||||
 | 
			
		||||
    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
 | 
			
		||||
    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
 | 
			
		||||
 | 
			
		||||
    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
 | 
			
		||||
 | 
			
		||||
    u32 src_width = src.width;
 | 
			
		||||
    u32 dst_width = dst.width;
 | 
			
		||||
    u32 x_elements = regs.line_length_in;
 | 
			
		||||
    u32 src_x_offset = src.origin.x;
 | 
			
		||||
    u32 dst_x_offset = dst.origin.x;
 | 
			
		||||
    u32 bpp_shift = 0U;
 | 
			
		||||
    if (!is_remapping) {
 | 
			
		||||
        bpp_shift = Common::FoldRight(
 | 
			
		||||
            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
 | 
			
		||||
            src_width, dst_width, x_elements, src_x_offset, dst_x_offset,
 | 
			
		||||
            static_cast<u32>(regs.offset_in), static_cast<u32>(regs.offset_out));
 | 
			
		||||
        src_width >>= bpp_shift;
 | 
			
		||||
        dst_width >>= bpp_shift;
 | 
			
		||||
        x_elements >>= bpp_shift;
 | 
			
		||||
        src_x_offset >>= bpp_shift;
 | 
			
		||||
        dst_x_offset >>= bpp_shift;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const u32 bytes_per_pixel = base_bpp << bpp_shift;
 | 
			
		||||
    const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth,
 | 
			
		||||
                                          src.block_size.height, src.block_size.depth);
 | 
			
		||||
    const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth,
 | 
			
		||||
                                          dst.block_size.height, dst.block_size.depth);
 | 
			
		||||
 | 
			
		||||
    const u32 pitch = x_elements * bytes_per_pixel;
 | 
			
		||||
    const size_t mid_buffer_size = pitch * regs.line_count;
 | 
			
		||||
 | 
			
		||||
    if (read_buffer.size() < src_size) {
 | 
			
		||||
        read_buffer.resize(src_size);
 | 
			
		||||
    }
 | 
			
		||||
    if (write_buffer.size() < dst_size) {
 | 
			
		||||
        write_buffer.resize(dst_size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    intermediate_buffer.resize(mid_buffer_size);
 | 
			
		||||
 | 
			
		||||
    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
 | 
			
		||||
    memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
 | 
			
		||||
 | 
			
		||||
    UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height,
 | 
			
		||||
                     src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count,
 | 
			
		||||
                     src.block_size.height, src.block_size.depth, pitch);
 | 
			
		||||
 | 
			
		||||
    SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
 | 
			
		||||
                   dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
 | 
			
		||||
                   dst.block_size.height, dst.block_size.depth, pitch);
 | 
			
		||||
 | 
			
		||||
    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void MaxwellDMA::ReleaseSemaphore() {
 | 
			
		||||
    const auto type = regs.launch_dma.semaphore_type;
 | 
			
		||||
    const GPUVAddr address = regs.semaphore.address;
 | 
			
		||||
 
 | 
			
		||||
@@ -223,6 +223,8 @@ private:
 | 
			
		||||
 | 
			
		||||
    void CopyPitchToBlockLinear();
 | 
			
		||||
 | 
			
		||||
    void CopyBlockLinearToBlockLinear();
 | 
			
		||||
 | 
			
		||||
    void FastCopyBlockLinearToPitch();
 | 
			
		||||
 | 
			
		||||
    void ReleaseSemaphore();
 | 
			
		||||
@@ -234,6 +236,7 @@ private:
 | 
			
		||||
 | 
			
		||||
    std::vector<u8> read_buffer;
 | 
			
		||||
    std::vector<u8> write_buffer;
 | 
			
		||||
    std::vector<u8> intermediate_buffer;
 | 
			
		||||
 | 
			
		||||
    static constexpr std::size_t NUM_REGS = 0x800;
 | 
			
		||||
    struct Regs {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										238
									
								
								src/video_core/engines/sw_blitter/blitter.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								src/video_core/engines/sw_blitter/blitter.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,238 @@
 | 
			
		||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 | 
			
		||||
// SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "video_core/engines/sw_blitter/blitter.h"
 | 
			
		||||
#include "video_core/engines/sw_blitter/converter.h"
 | 
			
		||||
#include "video_core/memory_manager.h"
 | 
			
		||||
#include "video_core/surface.h"
 | 
			
		||||
#include "video_core/textures/decoders.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
class MemoryManager;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
using VideoCore::Surface::BytesPerBlock;
 | 
			
		||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines::Blitter {
 | 
			
		||||
 | 
			
		||||
using namespace Texture;
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
constexpr size_t ir_components = 4;
 | 
			
		||||
 | 
			
		||||
void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height,
 | 
			
		||||
                     u32 dst_width, u32 dst_height, size_t bpp) {
 | 
			
		||||
    const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
 | 
			
		||||
    const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
 | 
			
		||||
    size_t src_y = 0;
 | 
			
		||||
    for (u32 y = 0; y < dst_height; y++) {
 | 
			
		||||
        size_t src_x = 0;
 | 
			
		||||
        for (u32 x = 0; x < dst_width; x++) {
 | 
			
		||||
            const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp;
 | 
			
		||||
            const size_t write_to = (y * dst_width + x) * bpp;
 | 
			
		||||
 | 
			
		||||
            std::memcpy(&output[write_to], &input[read_from], bpp);
 | 
			
		||||
            src_x += dx_du;
 | 
			
		||||
        }
 | 
			
		||||
        src_y += dy_dv;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width,
 | 
			
		||||
                         u32 src_height, u32 dst_width, u32 dst_height) {
 | 
			
		||||
    const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
 | 
			
		||||
    const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
 | 
			
		||||
    size_t src_y = 0;
 | 
			
		||||
    for (u32 y = 0; y < dst_height; y++) {
 | 
			
		||||
        size_t src_x = 0;
 | 
			
		||||
        for (u32 x = 0; x < dst_width; x++) {
 | 
			
		||||
            const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components;
 | 
			
		||||
            const size_t write_to = (y * dst_width + x) * ir_components;
 | 
			
		||||
 | 
			
		||||
            std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components);
 | 
			
		||||
            src_x += dx_du;
 | 
			
		||||
        }
 | 
			
		||||
        src_y += dy_dv;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width,
 | 
			
		||||
              size_t src_height, size_t dst_width, size_t dst_height) {
 | 
			
		||||
    const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0,
 | 
			
		||||
                                    std::span<const f32> x0_y1, std::span<const f32> x1_y1,
 | 
			
		||||
                                    f32 weight_x, f32 weight_y) {
 | 
			
		||||
        std::array<f32, ir_components> result{};
 | 
			
		||||
        for (size_t i = 0; i < ir_components; i++) {
 | 
			
		||||
            const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x);
 | 
			
		||||
            const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x);
 | 
			
		||||
            result[i] = std::lerp(a, b, weight_y);
 | 
			
		||||
        }
 | 
			
		||||
        return result;
 | 
			
		||||
    };
 | 
			
		||||
    const f32 dx_du =
 | 
			
		||||
        dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f;
 | 
			
		||||
    const f32 dy_dv =
 | 
			
		||||
        dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f;
 | 
			
		||||
    for (u32 y = 0; y < dst_height; y++) {
 | 
			
		||||
        for (u32 x = 0; x < dst_width; x++) {
 | 
			
		||||
            const f32 x_low = std::floor(static_cast<f32>(x) * dx_du);
 | 
			
		||||
            const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv);
 | 
			
		||||
            const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du);
 | 
			
		||||
            const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv);
 | 
			
		||||
            const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low;
 | 
			
		||||
            const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low;
 | 
			
		||||
 | 
			
		||||
            const auto read_src = [&](f32 in_x, f32 in_y) {
 | 
			
		||||
                const size_t read_from =
 | 
			
		||||
                    ((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) *
 | 
			
		||||
                    ir_components;
 | 
			
		||||
                return std::span<const f32>(&input[read_from], ir_components);
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            auto x0_y0 = read_src(x_low, y_low);
 | 
			
		||||
            auto x1_y0 = read_src(x_high, y_low);
 | 
			
		||||
            auto x0_y1 = read_src(x_low, y_high);
 | 
			
		||||
            auto x1_y1 = read_src(x_high, y_high);
 | 
			
		||||
 | 
			
		||||
            const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y);
 | 
			
		||||
 | 
			
		||||
            const size_t write_to = (y * dst_width + x) * ir_components;
 | 
			
		||||
 | 
			
		||||
            std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace
 | 
			
		||||
 | 
			
		||||
struct SoftwareBlitEngine::BlitEngineImpl {
 | 
			
		||||
    std::vector<u8> tmp_buffer;
 | 
			
		||||
    std::vector<u8> src_buffer;
 | 
			
		||||
    std::vector<u8> dst_buffer;
 | 
			
		||||
    std::vector<f32> intermediate_src;
 | 
			
		||||
    std::vector<f32> intermediate_dst;
 | 
			
		||||
    ConverterFactory converter_factory;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_)
 | 
			
		||||
    : memory_manager{memory_manager_} {
 | 
			
		||||
    impl = std::make_unique<BlitEngineImpl>();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SoftwareBlitEngine::~SoftwareBlitEngine() = default;
 | 
			
		||||
 | 
			
		||||
bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
 | 
			
		||||
                              Fermi2D::Config& config) {
 | 
			
		||||
    const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) {
 | 
			
		||||
        if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) {
 | 
			
		||||
            return CalculateSize(true, bytes_per_pixel, surface.width, surface.height,
 | 
			
		||||
                                 surface.depth, surface.block_height, surface.block_depth);
 | 
			
		||||
        }
 | 
			
		||||
        return static_cast<size_t>(surface.pitch * surface.height);
 | 
			
		||||
    };
 | 
			
		||||
    const auto process_pitch_linear = [](bool unpack, std::span<const u8> input,
 | 
			
		||||
                                         std::span<u8> output, u32 extent_x, u32 extent_y,
 | 
			
		||||
                                         u32 pitch, u32 x0, u32 y0, size_t bpp) {
 | 
			
		||||
        const size_t base_offset = x0 * bpp;
 | 
			
		||||
        const size_t copy_size = extent_x * bpp;
 | 
			
		||||
        for (u32 y = y0; y < extent_y; y++) {
 | 
			
		||||
            const size_t first_offset = y * pitch + base_offset;
 | 
			
		||||
            const size_t second_offset = y * extent_x * bpp;
 | 
			
		||||
            u8* write_to = unpack ? &output[first_offset] : &output[second_offset];
 | 
			
		||||
            const u8* read_from = unpack ? &input[second_offset] : &input[first_offset];
 | 
			
		||||
            std::memcpy(write_to, read_from, copy_size);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const u32 src_extent_x = config.src_x1 - config.src_x0;
 | 
			
		||||
    const u32 src_extent_y = config.src_y1 - config.src_y0;
 | 
			
		||||
 | 
			
		||||
    const u32 dst_extent_x = config.dst_x1 - config.dst_x0;
 | 
			
		||||
    const u32 dst_extent_y = config.dst_y1 - config.dst_y0;
 | 
			
		||||
    const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
 | 
			
		||||
    const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
 | 
			
		||||
    const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
 | 
			
		||||
    impl->tmp_buffer.resize(src_size);
 | 
			
		||||
    memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
 | 
			
		||||
 | 
			
		||||
    const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
 | 
			
		||||
 | 
			
		||||
    const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
 | 
			
		||||
 | 
			
		||||
    impl->src_buffer.resize(src_copy_size);
 | 
			
		||||
 | 
			
		||||
    const bool no_passthrough =
 | 
			
		||||
        src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y;
 | 
			
		||||
 | 
			
		||||
    const auto convertion_phase_same_format = [&]() {
 | 
			
		||||
        NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y,
 | 
			
		||||
                        dst_extent_x, dst_extent_y, dst_bytes_per_pixel);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const auto convertion_phase_ir = [&]() {
 | 
			
		||||
        auto* input_converter = impl->converter_factory.GetFormatConverter(src.format);
 | 
			
		||||
        impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components);
 | 
			
		||||
        impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components);
 | 
			
		||||
        input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src);
 | 
			
		||||
 | 
			
		||||
        if (config.filter != Fermi2D::Filter::Bilinear) {
 | 
			
		||||
            NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x,
 | 
			
		||||
                                src_extent_y, dst_extent_x, dst_extent_y);
 | 
			
		||||
        } else {
 | 
			
		||||
            Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y,
 | 
			
		||||
                     dst_extent_x, dst_extent_y);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format);
 | 
			
		||||
        output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Do actuall Blit
 | 
			
		||||
 | 
			
		||||
    impl->dst_buffer.resize(dst_copy_size);
 | 
			
		||||
    if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
 | 
			
		||||
        UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
 | 
			
		||||
                         src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
 | 
			
		||||
                         src_extent_y, src.block_height, src.block_depth,
 | 
			
		||||
                         src_extent_x * src_bytes_per_pixel);
 | 
			
		||||
    } else {
 | 
			
		||||
        process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
 | 
			
		||||
                             src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Conversion Phase
 | 
			
		||||
    if (no_passthrough) {
 | 
			
		||||
        if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) {
 | 
			
		||||
            convertion_phase_ir();
 | 
			
		||||
        } else {
 | 
			
		||||
            convertion_phase_same_format();
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        impl->dst_buffer.swap(impl->src_buffer);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
 | 
			
		||||
    impl->tmp_buffer.resize(dst_size);
 | 
			
		||||
    memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
 | 
			
		||||
 | 
			
		||||
    if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
 | 
			
		||||
        SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
 | 
			
		||||
                       dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
 | 
			
		||||
                       dst_extent_y, dst.block_height, dst.block_depth,
 | 
			
		||||
                       dst_extent_x * dst_bytes_per_pixel);
 | 
			
		||||
    } else {
 | 
			
		||||
        process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
 | 
			
		||||
                             dst.pitch, config.dst_x0, config.dst_y0,
 | 
			
		||||
                             static_cast<size_t>(dst_bytes_per_pixel));
 | 
			
		||||
    }
 | 
			
		||||
    memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra::Engines::Blitter
 | 
			
		||||
							
								
								
									
										27
									
								
								src/video_core/engines/sw_blitter/blitter.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								src/video_core/engines/sw_blitter/blitter.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
			
		||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 | 
			
		||||
// SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include "video_core/engines/fermi_2d.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
class MemoryManager;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines::Blitter {
 | 
			
		||||
 | 
			
		||||
class SoftwareBlitEngine {
 | 
			
		||||
public:
 | 
			
		||||
    explicit SoftwareBlitEngine(MemoryManager& memory_manager_);
 | 
			
		||||
    ~SoftwareBlitEngine();
 | 
			
		||||
 | 
			
		||||
    bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    MemoryManager& memory_manager;
 | 
			
		||||
    struct BlitEngineImpl;
 | 
			
		||||
    std::unique_ptr<BlitEngineImpl> impl;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra::Engines::Blitter
 | 
			
		||||
							
								
								
									
										1234
									
								
								src/video_core/engines/sw_blitter/converter.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1234
									
								
								src/video_core/engines/sw_blitter/converter.cpp
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										36
									
								
								src/video_core/engines/sw_blitter/converter.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								src/video_core/engines/sw_blitter/converter.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 | 
			
		||||
// SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <span>
 | 
			
		||||
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
 | 
			
		||||
#include "video_core/gpu.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines::Blitter {
 | 
			
		||||
 | 
			
		||||
class Converter {
 | 
			
		||||
public:
 | 
			
		||||
    virtual void ConvertTo(std::span<const u8> input, std::span<f32> output) = 0;
 | 
			
		||||
    virtual void ConvertFrom(std::span<const f32> input, std::span<u8> output) = 0;
 | 
			
		||||
    virtual ~Converter() = default;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ConverterFactory {
 | 
			
		||||
public:
 | 
			
		||||
    ConverterFactory();
 | 
			
		||||
    ~ConverterFactory();
 | 
			
		||||
 | 
			
		||||
    Converter* GetFormatConverter(RenderTargetFormat format);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    Converter* BuildConverter(RenderTargetFormat format);
 | 
			
		||||
 | 
			
		||||
    struct ConverterFactoryImpl;
 | 
			
		||||
    std::unique_ptr<ConverterFactoryImpl> impl;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra::Engines::Blitter
 | 
			
		||||
							
								
								
									
										136
									
								
								src/video_core/engines/sw_blitter/generate_converters.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								src/video_core/engines/sw_blitter/generate_converters.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,136 @@
 | 
			
		||||
# SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 | 
			
		||||
# SPDX-License-Identifier: GPL-3.0-or-later
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
class Format:
 | 
			
		||||
    def __init__(self, string_value):
 | 
			
		||||
        self.name = string_value
 | 
			
		||||
        tmp = string_value.split('_')
 | 
			
		||||
        self.component_type = tmp[1]
 | 
			
		||||
        component_data = re.findall(r"\w\d+", tmp[0])
 | 
			
		||||
        self.num_components = len(component_data)
 | 
			
		||||
        sizes = []
 | 
			
		||||
        swizzle = []
 | 
			
		||||
        for data in component_data:
 | 
			
		||||
            swizzle.append(data[0])
 | 
			
		||||
            sizes.append(int(data[1:]))
 | 
			
		||||
        self.sizes = sizes
 | 
			
		||||
        self.swizzle = swizzle
 | 
			
		||||
 | 
			
		||||
    def build_component_type_array(self):
 | 
			
		||||
        result = "{ "
 | 
			
		||||
        b = False
 | 
			
		||||
        for i in range(0, self.num_components):
 | 
			
		||||
            if b:
 | 
			
		||||
                result += ", "
 | 
			
		||||
            b = True
 | 
			
		||||
            result += "ComponentType::" + self.component_type
 | 
			
		||||
        result += " }"
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def build_component_sizes_array(self):
 | 
			
		||||
        result = "{ "
 | 
			
		||||
        b = False
 | 
			
		||||
        for i in range(0, self.num_components):
 | 
			
		||||
            if b:
 | 
			
		||||
                result += ", "
 | 
			
		||||
            b = True
 | 
			
		||||
            result += str(self.sizes[i])
 | 
			
		||||
        result += " }"
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def build_component_swizzle_array(self):
 | 
			
		||||
        result = "{ "
 | 
			
		||||
        b = False
 | 
			
		||||
        for i in range(0, self.num_components):
 | 
			
		||||
            if b:
 | 
			
		||||
                result += ", "
 | 
			
		||||
            b = True
 | 
			
		||||
            swizzle = self.swizzle[i]
 | 
			
		||||
            if swizzle == "X":
 | 
			
		||||
                swizzle = "None"
 | 
			
		||||
            result += "Swizzle::" + swizzle
 | 
			
		||||
        result += " }"
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    def print_declaration(self):
 | 
			
		||||
        print("struct " + self.name + "Traits {")
 | 
			
		||||
        print("  static constexpr size_t num_components = " + str(self.num_components) + ";")
 | 
			
		||||
        print("  static constexpr std::array<ComponentType, num_components> component_types = " + self.build_component_type_array() + ";")
 | 
			
		||||
        print("  static constexpr std::array<size_t, num_components> component_sizes = " + self.build_component_sizes_array() + ";")
 | 
			
		||||
        print("  static constexpr std::array<Swizzle, num_components> component_swizzle = " + self.build_component_swizzle_array() + ";")
 | 
			
		||||
        print("};\n")
 | 
			
		||||
 | 
			
		||||
    def print_case(self):
 | 
			
		||||
        print("case RenderTargetFormat::" + self.name + ":")
 | 
			
		||||
        print("  return impl->converters_cache")
 | 
			
		||||
        print("    .emplace(format, std::make_unique<ConverterImpl<" + self.name + "Traits>>())")
 | 
			
		||||
        print("    .first->second.get();")
 | 
			
		||||
        print("  break;")
 | 
			
		||||
 | 
			
		||||
txt = """
 | 
			
		||||
R32G32B32A32_FLOAT
 | 
			
		||||
R32G32B32A32_SINT
 | 
			
		||||
R32G32B32A32_UINT
 | 
			
		||||
R32G32B32X32_FLOAT
 | 
			
		||||
R32G32B32X32_SINT
 | 
			
		||||
R32G32B32X32_UINT
 | 
			
		||||
R16G16B16A16_UNORM
 | 
			
		||||
R16G16B16A16_SNORM
 | 
			
		||||
R16G16B16A16_SINT
 | 
			
		||||
R16G16B16A16_UINT
 | 
			
		||||
R16G16B16A16_FLOAT
 | 
			
		||||
R32G32_FLOAT
 | 
			
		||||
R32G32_SINT
 | 
			
		||||
R32G32_UINT
 | 
			
		||||
R16G16B16X16_FLOAT
 | 
			
		||||
A8R8G8B8_UNORM
 | 
			
		||||
A8R8G8B8_SRGB
 | 
			
		||||
A2B10G10R10_UNORM
 | 
			
		||||
A2B10G10R10_UINT
 | 
			
		||||
A2R10G10B10_UNORM
 | 
			
		||||
A8B8G8R8_UNORM
 | 
			
		||||
A8B8G8R8_SRGB
 | 
			
		||||
A8B8G8R8_SNORM
 | 
			
		||||
A8B8G8R8_SINT
 | 
			
		||||
A8B8G8R8_UINT
 | 
			
		||||
R16G16_UNORM
 | 
			
		||||
R16G16_SNORM
 | 
			
		||||
R16G16_SINT
 | 
			
		||||
R16G16_UINT
 | 
			
		||||
R16G16_FLOAT
 | 
			
		||||
B10G11R11_FLOAT
 | 
			
		||||
R32_SINT
 | 
			
		||||
R32_UINT
 | 
			
		||||
R32_FLOAT
 | 
			
		||||
X8R8G8B8_UNORM
 | 
			
		||||
X8R8G8B8_SRGB
 | 
			
		||||
R5G6B5_UNORM
 | 
			
		||||
A1R5G5B5_UNORM
 | 
			
		||||
R8G8_UNORM
 | 
			
		||||
R8G8_SNORM
 | 
			
		||||
R8G8_SINT
 | 
			
		||||
R8G8_UINT
 | 
			
		||||
R16_UNORM
 | 
			
		||||
R16_SNORM
 | 
			
		||||
R16_SINT
 | 
			
		||||
R16_UINT
 | 
			
		||||
R16_FLOAT
 | 
			
		||||
R8_UNORM
 | 
			
		||||
R8_SNORM
 | 
			
		||||
R8_SINT
 | 
			
		||||
R8_UINT
 | 
			
		||||
X1R5G5B5_UNORM
 | 
			
		||||
X8B8G8R8_UNORM
 | 
			
		||||
X8B8G8R8_SRGB
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
x = txt.split()
 | 
			
		||||
y = list(map(lambda a: Format(a), x))
 | 
			
		||||
formats = list(y)
 | 
			
		||||
for format in formats:
 | 
			
		||||
  format.print_declaration()
 | 
			
		||||
 | 
			
		||||
for format in formats:
 | 
			
		||||
  format.print_case()
 | 
			
		||||
@@ -27,12 +27,12 @@ struct CommandList;
 | 
			
		||||
// TODO: Implement the commented ones
 | 
			
		||||
enum class RenderTargetFormat : u32 {
 | 
			
		||||
    NONE = 0x0,
 | 
			
		||||
    R32B32G32A32_FLOAT = 0xC0,
 | 
			
		||||
    R32G32B32A32_FLOAT = 0xC0,
 | 
			
		||||
    R32G32B32A32_SINT = 0xC1,
 | 
			
		||||
    R32G32B32A32_UINT = 0xC2,
 | 
			
		||||
    // R32G32B32X32_FLOAT = 0xC3,
 | 
			
		||||
    // R32G32B32X32_SINT = 0xC4,
 | 
			
		||||
    // R32G32B32X32_UINT = 0xC5,
 | 
			
		||||
    R32G32B32X32_FLOAT = 0xC3,
 | 
			
		||||
    R32G32B32X32_SINT = 0xC4,
 | 
			
		||||
    R32G32B32X32_UINT = 0xC5,
 | 
			
		||||
    R16G16B16A16_UNORM = 0xC6,
 | 
			
		||||
    R16G16B16A16_SNORM = 0xC7,
 | 
			
		||||
    R16G16B16A16_SINT = 0xC8,
 | 
			
		||||
@@ -56,13 +56,13 @@ enum class RenderTargetFormat : u32 {
 | 
			
		||||
    R16G16_SINT = 0xDC,
 | 
			
		||||
    R16G16_UINT = 0xDD,
 | 
			
		||||
    R16G16_FLOAT = 0xDE,
 | 
			
		||||
    // A2R10G10B10_UNORM = 0xDF,
 | 
			
		||||
    A2R10G10B10_UNORM = 0xDF,
 | 
			
		||||
    B10G11R11_FLOAT = 0xE0,
 | 
			
		||||
    R32_SINT = 0xE3,
 | 
			
		||||
    R32_UINT = 0xE4,
 | 
			
		||||
    R32_FLOAT = 0xE5,
 | 
			
		||||
    // X8R8G8B8_UNORM = 0xE6,
 | 
			
		||||
    // X8R8G8B8_SRGB = 0xE7,
 | 
			
		||||
    X8R8G8B8_UNORM = 0xE6,
 | 
			
		||||
    X8R8G8B8_SRGB = 0xE7,
 | 
			
		||||
    R5G6B5_UNORM = 0xE8,
 | 
			
		||||
    A1R5G5B5_UNORM = 0xE9,
 | 
			
		||||
    R8G8_UNORM = 0xEA,
 | 
			
		||||
@@ -79,11 +79,11 @@ enum class RenderTargetFormat : u32 {
 | 
			
		||||
    R8_SINT = 0xF5,
 | 
			
		||||
    R8_UINT = 0xF6,
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    A8_UNORM = 0xF7,
 | 
			
		||||
    // A8_UNORM = 0xF7,
 | 
			
		||||
    X1R5G5B5_UNORM = 0xF8,
 | 
			
		||||
    X8B8G8R8_UNORM = 0xF9,
 | 
			
		||||
    X8B8G8R8_SRGB = 0xFA,
 | 
			
		||||
    /*
 | 
			
		||||
    Z1R5G5B5_UNORM = 0xFB,
 | 
			
		||||
    O1R5G5B5_UNORM = 0xFC,
 | 
			
		||||
    Z8R8G8B8_UNORM = 0xFD,
 | 
			
		||||
 
 | 
			
		||||
@@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
 | 
			
		||||
                                             const Tegra::Engines::Fermi2D::Config& copy_config) {
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_Blits);
 | 
			
		||||
    std::scoped_lock lock{texture_cache.mutex};
 | 
			
		||||
    texture_cache.BlitImage(dst, src, copy_config);
 | 
			
		||||
    return true;
 | 
			
		||||
    return texture_cache.BlitImage(dst, src, copy_config);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
 | 
			
		||||
    {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
 | 
			
		||||
    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
 | 
			
		||||
    {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
 | 
			
		||||
    {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2R10G10B10_UNORM
 | 
			
		||||
    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
 | 
			
		||||
    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1},                 // A5B5G5R1_UNORM
 | 
			
		||||
    {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
 | 
			
		||||
 
 | 
			
		||||
@@ -125,6 +125,7 @@ struct FormatTuple {
 | 
			
		||||
    {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},              // A1R5G5B5_UNORM
 | 
			
		||||
    {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
 | 
			
		||||
    {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage},  // A2B10G10R10_UINT
 | 
			
		||||
    {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM
 | 
			
		||||
    {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},         // A1B5G5R5_UNORM (flipped with swizzle)
 | 
			
		||||
    {VK_FORMAT_R5G5B5A1_UNORM_PACK16},                     // A5B5G5R1_UNORM (specially swizzled)
 | 
			
		||||
    {VK_FORMAT_R8_UNORM, Attachable | Storage},            // R8_UNORM
 | 
			
		||||
 
 | 
			
		||||
@@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
 | 
			
		||||
                                             const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
                                             const Tegra::Engines::Fermi2D::Config& copy_config) {
 | 
			
		||||
    std::scoped_lock lock{texture_cache.mutex};
 | 
			
		||||
    texture_cache.BlitImage(dst, src, copy_config);
 | 
			
		||||
    return true;
 | 
			
		||||
    return texture_cache.BlitImage(dst, src, copy_config);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
 | 
			
		||||
 
 | 
			
		||||
@@ -93,11 +93,14 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
 | 
			
		||||
 | 
			
		||||
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
 | 
			
		||||
    switch (format) {
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT:
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32G32B32A32_FLOAT:
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32G32B32X32_FLOAT:
 | 
			
		||||
        return PixelFormat::R32G32B32A32_FLOAT;
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32G32B32A32_SINT:
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32G32B32X32_SINT:
 | 
			
		||||
        return PixelFormat::R32G32B32A32_SINT;
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32G32B32A32_UINT:
 | 
			
		||||
    case Tegra::RenderTargetFormat::R32G32B32X32_UINT:
 | 
			
		||||
        return PixelFormat::R32G32B32A32_UINT;
 | 
			
		||||
    case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:
 | 
			
		||||
        return PixelFormat::R16G16B16A16_UNORM;
 | 
			
		||||
@@ -118,16 +121,22 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
 | 
			
		||||
    case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:
 | 
			
		||||
        return PixelFormat::R16G16B16X16_FLOAT;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A8R8G8B8_UNORM:
 | 
			
		||||
    case Tegra::RenderTargetFormat::X8R8G8B8_UNORM:
 | 
			
		||||
        return PixelFormat::B8G8R8A8_UNORM;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A8R8G8B8_SRGB:
 | 
			
		||||
    case Tegra::RenderTargetFormat::X8R8G8B8_SRGB:
 | 
			
		||||
        return PixelFormat::B8G8R8A8_SRGB;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:
 | 
			
		||||
        return PixelFormat::A2B10G10R10_UNORM;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A2B10G10R10_UINT:
 | 
			
		||||
        return PixelFormat::A2B10G10R10_UINT;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A2R10G10B10_UNORM:
 | 
			
		||||
        return PixelFormat::A2R10G10B10_UNORM;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A8B8G8R8_UNORM:
 | 
			
		||||
    case Tegra::RenderTargetFormat::X8B8G8R8_UNORM:
 | 
			
		||||
        return PixelFormat::A8B8G8R8_UNORM;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A8B8G8R8_SRGB:
 | 
			
		||||
    case Tegra::RenderTargetFormat::X8B8G8R8_SRGB:
 | 
			
		||||
        return PixelFormat::A8B8G8R8_SRGB;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:
 | 
			
		||||
        return PixelFormat::A8B8G8R8_SNORM;
 | 
			
		||||
@@ -156,6 +165,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
 | 
			
		||||
    case Tegra::RenderTargetFormat::R5G6B5_UNORM:
 | 
			
		||||
        return PixelFormat::R5G6B5_UNORM;
 | 
			
		||||
    case Tegra::RenderTargetFormat::A1R5G5B5_UNORM:
 | 
			
		||||
    case Tegra::RenderTargetFormat::X1R5G5B5_UNORM:
 | 
			
		||||
        return PixelFormat::A1R5G5B5_UNORM;
 | 
			
		||||
    case Tegra::RenderTargetFormat::R8G8_UNORM:
 | 
			
		||||
        return PixelFormat::R8G8_UNORM;
 | 
			
		||||
 
 | 
			
		||||
@@ -23,6 +23,7 @@ enum class PixelFormat {
 | 
			
		||||
    A1R5G5B5_UNORM,
 | 
			
		||||
    A2B10G10R10_UNORM,
 | 
			
		||||
    A2B10G10R10_UINT,
 | 
			
		||||
    A2R10G10B10_UNORM,
 | 
			
		||||
    A1B5G5R5_UNORM,
 | 
			
		||||
    A5B5G5R1_UNORM,
 | 
			
		||||
    R8_UNORM,
 | 
			
		||||
@@ -159,6 +160,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
 | 
			
		||||
    1,  // A1R5G5B5_UNORM
 | 
			
		||||
    1,  // A2B10G10R10_UNORM
 | 
			
		||||
    1,  // A2B10G10R10_UINT
 | 
			
		||||
    1,  // A2R10G10B10_UNORM
 | 
			
		||||
    1,  // A1B5G5R5_UNORM
 | 
			
		||||
    1,  // A5B5G5R1_UNORM
 | 
			
		||||
    1,  // R8_UNORM
 | 
			
		||||
@@ -264,6 +266,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
 | 
			
		||||
    1,  // A1R5G5B5_UNORM
 | 
			
		||||
    1,  // A2B10G10R10_UNORM
 | 
			
		||||
    1,  // A2B10G10R10_UINT
 | 
			
		||||
    1,  // A2R10G10B10_UNORM
 | 
			
		||||
    1,  // A1B5G5R5_UNORM
 | 
			
		||||
    1,  // A5B5G5R1_UNORM
 | 
			
		||||
    1,  // R8_UNORM
 | 
			
		||||
@@ -369,6 +372,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
 | 
			
		||||
    16,  // A1R5G5B5_UNORM
 | 
			
		||||
    32,  // A2B10G10R10_UNORM
 | 
			
		||||
    32,  // A2B10G10R10_UINT
 | 
			
		||||
    32,  // A2R10G10B10_UNORM
 | 
			
		||||
    16,  // A1B5G5R5_UNORM
 | 
			
		||||
    16,  // A5B5G5R1_UNORM
 | 
			
		||||
    8,   // R8_UNORM
 | 
			
		||||
 
 | 
			
		||||
@@ -35,6 +35,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
 | 
			
		||||
                return "A2B10G10R10_UNORM";
 | 
			
		||||
            case PixelFormat::A2B10G10R10_UINT:
 | 
			
		||||
                return "A2B10G10R10_UINT";
 | 
			
		||||
            case PixelFormat::A2R10G10B10_UNORM:
 | 
			
		||||
                return "A2R10G10B10_UNORM";
 | 
			
		||||
            case PixelFormat::A1B5G5R5_UNORM:
 | 
			
		||||
                return "A1B5G5R5_UNORM";
 | 
			
		||||
            case PixelFormat::A5B5G5R1_UNORM:
 | 
			
		||||
 
 | 
			
		||||
@@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
                                const Tegra::Engines::Fermi2D::Surface& src,
 | 
			
		||||
                                const Tegra::Engines::Fermi2D::Config& copy) {
 | 
			
		||||
    const BlitImages images = GetBlitImages(dst, src, copy);
 | 
			
		||||
    const auto result = GetBlitImages(dst, src, copy);
 | 
			
		||||
    if (!result) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    const BlitImages images = *result;
 | 
			
		||||
    const ImageId dst_id = images.dst_id;
 | 
			
		||||
    const ImageId src_id = images.src_id;
 | 
			
		||||
 | 
			
		||||
@@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
        runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
 | 
			
		||||
                          copy.operation);
 | 
			
		||||
    }
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
@@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
 | 
			
		||||
std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages(
 | 
			
		||||
    const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
 | 
			
		||||
    const Tegra::Engines::Fermi2D::Config& copy) {
 | 
			
		||||
 | 
			
		||||
@@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
 | 
			
		||||
        has_deleted_images = false;
 | 
			
		||||
        src_id = FindImage(src_info, src_addr, try_options);
 | 
			
		||||
        dst_id = FindImage(dst_info, dst_addr, try_options);
 | 
			
		||||
        if (!copy.must_accelerate) {
 | 
			
		||||
            do {
 | 
			
		||||
                if (!src_id && !dst_id) {
 | 
			
		||||
                    return std::nullopt;
 | 
			
		||||
                }
 | 
			
		||||
                if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
                if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
                return std::nullopt;
 | 
			
		||||
            } while (false);
 | 
			
		||||
        }
 | 
			
		||||
        const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
 | 
			
		||||
        if (src_image && src_image->info.num_samples > 1) {
 | 
			
		||||
            RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews};
 | 
			
		||||
@@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
 | 
			
		||||
            dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});
 | 
			
		||||
        } while (has_deleted_images);
 | 
			
		||||
    }
 | 
			
		||||
    return BlitImages{
 | 
			
		||||
    return {BlitImages{
 | 
			
		||||
        .dst_id = dst_id,
 | 
			
		||||
        .src_id = src_id,
 | 
			
		||||
        .dst_format = dst_info.format,
 | 
			
		||||
        .src_format = src_info.format,
 | 
			
		||||
    };
 | 
			
		||||
    }};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class P>
 | 
			
		||||
 
 | 
			
		||||
@@ -174,7 +174,7 @@ public:
 | 
			
		||||
    void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
 | 
			
		||||
 | 
			
		||||
    /// Blit an image with the given parameters
 | 
			
		||||
    void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
    bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
                   const Tegra::Engines::Fermi2D::Surface& src,
 | 
			
		||||
                   const Tegra::Engines::Fermi2D::Config& copy);
 | 
			
		||||
 | 
			
		||||
@@ -285,9 +285,9 @@ private:
 | 
			
		||||
    [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
 | 
			
		||||
 | 
			
		||||
    /// Return a blit image pair from the given guest blit parameters
 | 
			
		||||
    [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
 | 
			
		||||
                                           const Tegra::Engines::Fermi2D::Surface& src,
 | 
			
		||||
                                           const Tegra::Engines::Fermi2D::Config& copy);
 | 
			
		||||
    [[nodiscard]] std::optional<BlitImages> GetBlitImages(
 | 
			
		||||
        const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
 | 
			
		||||
        const Tegra::Engines::Fermi2D::Config& copy);
 | 
			
		||||
 | 
			
		||||
    /// Find or create a sampler from a guest descriptor sampler
 | 
			
		||||
    [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user