2023-03-27 13:29:17 +02:00
|
|
|
// Copyright 2015 Citra Emulator Project
|
|
|
|
// Licensed under GPLv2 or any later version
|
|
|
|
// Refer to the license.txt file included.
|
|
|
|
|
2023-06-24 00:59:18 +02:00
|
|
|
#include <boost/container/static_vector.hpp>
|
|
|
|
#include "common/logging/log.h"
|
|
|
|
#include "common/microprofile.h"
|
|
|
|
#include "common/quaternion.h"
|
|
|
|
#include "common/vector_math.h"
|
|
|
|
#include "core/memory.h"
|
|
|
|
#include "video_core/pica_state.h"
|
|
|
|
#include "video_core/pica_types.h"
|
|
|
|
#include "video_core/renderer_software/sw_framebuffer.h"
|
|
|
|
#include "video_core/renderer_software/sw_lighting.h"
|
|
|
|
#include "video_core/renderer_software/sw_proctex.h"
|
2023-03-27 13:29:17 +02:00
|
|
|
#include "video_core/renderer_software/sw_rasterizer.h"
|
2023-06-24 00:59:18 +02:00
|
|
|
#include "video_core/renderer_software/sw_texturing.h"
|
|
|
|
#include "video_core/shader/shader.h"
|
|
|
|
#include "video_core/texture/texture_decode.h"
|
2023-03-27 13:29:17 +02:00
|
|
|
|
2023-06-24 00:59:18 +02:00
|
|
|
namespace SwRenderer {
|
|
|
|
|
|
|
|
using Pica::f24;
|
|
|
|
using Pica::FramebufferRegs;
|
|
|
|
using Pica::RasterizerRegs;
|
|
|
|
using Pica::TexturingRegs;
|
|
|
|
using Pica::Texture::LookupTexture;
|
|
|
|
using Pica::Texture::TextureInfo;
|
|
|
|
|
2023-09-10 23:29:55 +02:00
|
|
|
// Certain games render 2D elements very close to clip plane 0 resulting in very tiny
|
|
|
|
// negative/positive z values when computing with f32 precision,
|
|
|
|
// causing some vertices to get erroneously clipped. To workaround this problem,
|
|
|
|
// we can use a very small epsilon value for clip plane comparison.
|
|
|
|
constexpr f32 EPSILON_Z = 0.00000001f;
|
|
|
|
|
2023-06-24 00:59:18 +02:00
|
|
|
struct Vertex : Pica::Shader::OutputVertex {
|
|
|
|
Vertex(const OutputVertex& v) : OutputVertex(v) {}
|
|
|
|
|
|
|
|
/// Attributes used to store intermediate results position after perspective divide.
|
|
|
|
Common::Vec3<f24> screenpos;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Linear interpolation
|
|
|
|
* factor: 0=this, 1=vtx
|
|
|
|
* Note: This function cannot be called after perspective divide.
|
|
|
|
**/
|
|
|
|
void Lerp(f24 factor, const Vertex& vtx) {
|
|
|
|
pos = pos * factor + vtx.pos * (f24::One() - factor);
|
|
|
|
quat = quat * factor + vtx.quat * (f24::One() - factor);
|
|
|
|
color = color * factor + vtx.color * (f24::One() - factor);
|
|
|
|
tc0 = tc0 * factor + vtx.tc0 * (f24::One() - factor);
|
|
|
|
tc1 = tc1 * factor + vtx.tc1 * (f24::One() - factor);
|
|
|
|
tc0_w = tc0_w * factor + vtx.tc0_w * (f24::One() - factor);
|
|
|
|
view = view * factor + vtx.view * (f24::One() - factor);
|
|
|
|
tc2 = tc2 * factor + vtx.tc2 * (f24::One() - factor);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Linear interpolation
|
|
|
|
* factor: 0=v0, 1=v1
|
|
|
|
* Note: This function cannot be called after perspective divide.
|
|
|
|
**/
|
|
|
|
static Vertex Lerp(f24 factor, const Vertex& v0, const Vertex& v1) {
|
|
|
|
Vertex ret = v0;
|
|
|
|
ret.Lerp(factor, v1);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
|
|
|
|
|
|
|
|
struct ClippingEdge {
|
|
|
|
public:
|
|
|
|
constexpr ClippingEdge(Common::Vec4<f24> coeffs,
|
|
|
|
Common::Vec4<f24> bias = Common::Vec4<f24>(f24::Zero(), f24::Zero(),
|
|
|
|
f24::Zero(), f24::Zero()))
|
|
|
|
: pos(f24::Zero()), coeffs(coeffs), bias(bias) {}
|
|
|
|
|
|
|
|
bool IsInside(const Vertex& vertex) const {
|
2023-09-10 23:29:55 +02:00
|
|
|
return Common::Dot(vertex.pos + bias, coeffs) >= f24::FromFloat32(-EPSILON_Z);
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool IsOutSide(const Vertex& vertex) const {
|
|
|
|
return !IsInside(vertex);
|
|
|
|
}
|
|
|
|
|
|
|
|
Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const {
|
|
|
|
const f24 dp = Common::Dot(v0.pos + bias, coeffs);
|
|
|
|
const f24 dp_prev = Common::Dot(v1.pos + bias, coeffs);
|
|
|
|
const f24 factor = dp_prev / (dp_prev - dp);
|
|
|
|
return Vertex::Lerp(factor, v0, v1);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
[[maybe_unused]] f24 pos;
|
|
|
|
Common::Vec4<f24> coeffs;
|
|
|
|
Common::Vec4<f24> bias;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // Anonymous namespace
|
|
|
|
|
|
|
|
RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_)
|
2023-08-28 10:09:23 +02:00
|
|
|
: memory{memory_}, state{Pica::g_state}, regs{state.regs},
|
|
|
|
num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)},
|
|
|
|
sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {}
|
2023-03-27 13:29:17 +02:00
|
|
|
|
|
|
|
void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
|
|
|
const Pica::Shader::OutputVertex& v1,
|
|
|
|
const Pica::Shader::OutputVertex& v2) {
|
2023-06-24 00:59:18 +02:00
|
|
|
/**
|
|
|
|
* Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at
|
|
|
|
* the new edge (or less in degenerate cases). As such, we can say that each clipping plane
|
|
|
|
* introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
|
|
|
|
* fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
|
|
|
|
**/
|
|
|
|
static constexpr std::size_t MAX_VERTICES = 9;
|
|
|
|
|
|
|
|
boost::container::static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
|
|
|
|
boost::container::static_vector<Vertex, MAX_VERTICES> buffer_b;
|
|
|
|
|
|
|
|
FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat);
|
|
|
|
FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat);
|
|
|
|
|
|
|
|
auto* output_list = &buffer_a;
|
|
|
|
auto* input_list = &buffer_b;
|
|
|
|
|
|
|
|
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
|
|
|
|
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
|
|
|
|
// epsilon possible within f24 accuracy.
|
|
|
|
static constexpr f24 EPSILON = f24::FromFloat32(0.00001f);
|
|
|
|
static constexpr f24 f0 = f24::Zero();
|
|
|
|
static constexpr f24 f1 = f24::One();
|
|
|
|
static constexpr std::array<ClippingEdge, 7> clipping_edges = {{
|
|
|
|
{Common::MakeVec(-f1, f0, f0, f1)}, // x = +w
|
|
|
|
{Common::MakeVec(f1, f0, f0, f1)}, // x = -w
|
|
|
|
{Common::MakeVec(f0, -f1, f0, f1)}, // y = +w
|
|
|
|
{Common::MakeVec(f0, f1, f0, f1)}, // y = -w
|
|
|
|
{Common::MakeVec(f0, f0, -f1, f0)}, // z = 0
|
|
|
|
{Common::MakeVec(f0, f0, f1, f1)}, // z = -w
|
|
|
|
{Common::MakeVec(f0, f0, f0, f1), Common::Vec4<f24>(f0, f0, f0, EPSILON)}, // w = EPSILON
|
|
|
|
}};
|
|
|
|
|
|
|
|
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
|
|
|
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
|
|
|
const auto clip = [&](const ClippingEdge& edge) {
|
|
|
|
std::swap(input_list, output_list);
|
|
|
|
output_list->clear();
|
|
|
|
|
|
|
|
const Vertex* reference_vertex = &input_list->back();
|
|
|
|
for (const auto& vertex : *input_list) {
|
|
|
|
// NOTE: This algorithm changes vertex order in some cases!
|
|
|
|
if (edge.IsInside(vertex)) {
|
|
|
|
if (edge.IsOutSide(*reference_vertex)) {
|
|
|
|
output_list->push_back(edge.GetIntersection(vertex, *reference_vertex));
|
|
|
|
}
|
|
|
|
output_list->push_back(vertex);
|
|
|
|
} else if (edge.IsInside(*reference_vertex)) {
|
|
|
|
output_list->push_back(edge.GetIntersection(vertex, *reference_vertex));
|
|
|
|
}
|
|
|
|
reference_vertex = &vertex;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
for (const ClippingEdge& edge : clipping_edges) {
|
|
|
|
clip(edge);
|
|
|
|
if (output_list->size() < 3) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state.regs.rasterizer.clip_enable) {
|
|
|
|
const ClippingEdge custom_edge{state.regs.rasterizer.GetClipCoef()};
|
|
|
|
clip(custom_edge);
|
|
|
|
if (output_list->size() < 3) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MakeScreenCoords((*output_list)[0]);
|
|
|
|
MakeScreenCoords((*output_list)[1]);
|
|
|
|
|
|
|
|
for (std::size_t i = 0; i < output_list->size() - 2; i++) {
|
|
|
|
Vertex& vtx0 = (*output_list)[0];
|
|
|
|
Vertex& vtx1 = (*output_list)[i + 1];
|
|
|
|
Vertex& vtx2 = (*output_list)[i + 2];
|
|
|
|
|
|
|
|
MakeScreenCoords(vtx2);
|
|
|
|
|
|
|
|
LOG_TRACE(
|
|
|
|
Render_Software,
|
|
|
|
"Triangle {}/{} at position ({:.3}, {:.3}, {:.3}, {:.3f}), "
|
|
|
|
"({:.3}, {:.3}, {:.3}, {:.3}), ({:.3}, {:.3}, {:.3}, {:.3}) and "
|
|
|
|
"screen position ({:.2}, {:.2}, {:.2}), ({:.2}, {:.2}, {:.2}), ({:.2}, {:.2}, {:.2})",
|
|
|
|
i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(),
|
|
|
|
vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(),
|
|
|
|
vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
|
|
|
|
vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(),
|
|
|
|
vtx2.pos.w.ToFloat32(), vtx0.screenpos.x.ToFloat32(), vtx0.screenpos.y.ToFloat32(),
|
|
|
|
vtx0.screenpos.z.ToFloat32(), vtx1.screenpos.x.ToFloat32(),
|
|
|
|
vtx1.screenpos.y.ToFloat32(), vtx1.screenpos.z.ToFloat32(),
|
|
|
|
vtx2.screenpos.x.ToFloat32(), vtx2.screenpos.y.ToFloat32(),
|
|
|
|
vtx2.screenpos.z.ToFloat32());
|
|
|
|
|
|
|
|
ProcessTriangle(vtx0, vtx1, vtx2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void RasterizerSoftware::MakeScreenCoords(Vertex& vtx) {
|
|
|
|
Viewport viewport{};
|
|
|
|
viewport.halfsize_x = f24::FromRaw(regs.rasterizer.viewport_size_x);
|
|
|
|
viewport.halfsize_y = f24::FromRaw(regs.rasterizer.viewport_size_y);
|
|
|
|
viewport.offset_x = f24::FromFloat32(static_cast<f32>(regs.rasterizer.viewport_corner.x));
|
|
|
|
viewport.offset_y = f24::FromFloat32(static_cast<f32>(regs.rasterizer.viewport_corner.y));
|
|
|
|
|
|
|
|
f24 inv_w = f24::One() / vtx.pos.w;
|
|
|
|
vtx.pos.w = inv_w;
|
|
|
|
vtx.quat *= inv_w;
|
|
|
|
vtx.color *= inv_w;
|
|
|
|
vtx.tc0 *= inv_w;
|
|
|
|
vtx.tc1 *= inv_w;
|
|
|
|
vtx.tc0_w *= inv_w;
|
|
|
|
vtx.view *= inv_w;
|
|
|
|
vtx.tc2 *= inv_w;
|
|
|
|
|
|
|
|
vtx.screenpos[0] = (vtx.pos.x * inv_w + f24::One()) * viewport.halfsize_x + viewport.offset_x;
|
|
|
|
vtx.screenpos[1] = (vtx.pos.y * inv_w + f24::One()) * viewport.halfsize_y + viewport.offset_y;
|
|
|
|
vtx.screenpos[2] = vtx.pos.z * inv_w;
|
|
|
|
}
|
|
|
|
|
|
|
|
void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2,
|
|
|
|
bool reversed) {
|
|
|
|
MICROPROFILE_SCOPE(GPU_Rasterization);
|
|
|
|
|
|
|
|
// Vertex positions in rasterizer coordinates
|
|
|
|
static auto screen_to_rasterizer_coords = [](const Common::Vec3<f24>& vec) {
|
|
|
|
return Common::Vec3{Fix12P4::FromFloat24(vec.x), Fix12P4::FromFloat24(vec.y),
|
|
|
|
Fix12P4::FromFloat24(vec.z)};
|
|
|
|
};
|
|
|
|
|
|
|
|
const std::array<Common::Vec3<Fix12P4>, 3> vtxpos = {
|
|
|
|
screen_to_rasterizer_coords(v0.screenpos),
|
|
|
|
screen_to_rasterizer_coords(v1.screenpos),
|
|
|
|
screen_to_rasterizer_coords(v2.screenpos),
|
|
|
|
};
|
|
|
|
|
|
|
|
if (regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepAll) {
|
|
|
|
// Make sure we always end up with a triangle wound counter-clockwise
|
|
|
|
if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
|
|
|
|
ProcessTriangle(v0, v2, v1, true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!reversed && regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepClockWise) {
|
|
|
|
// Reverse vertex order and use the CCW code path.
|
|
|
|
ProcessTriangle(v0, v2, v1, true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Cull away triangles which are wound clockwise.
|
|
|
|
if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
|
|
|
u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
|
|
|
u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
|
|
|
|
u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
|
|
|
|
|
|
|
|
// Convert the scissor box coordinates to 12.4 fixed point
|
|
|
|
const u16 scissor_x1 = static_cast<u16>(regs.rasterizer.scissor_test.x1 << 4);
|
|
|
|
const u16 scissor_y1 = static_cast<u16>(regs.rasterizer.scissor_test.y1 << 4);
|
|
|
|
// x2,y2 have +1 added to cover the entire sub-pixel area
|
|
|
|
const u16 scissor_x2 = static_cast<u16>((regs.rasterizer.scissor_test.x2 + 1) << 4);
|
|
|
|
const u16 scissor_y2 = static_cast<u16>((regs.rasterizer.scissor_test.y2 + 1) << 4);
|
|
|
|
|
|
|
|
if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Include) {
|
|
|
|
// Calculate the new bounds
|
|
|
|
min_x = std::max(min_x, scissor_x1);
|
|
|
|
min_y = std::max(min_y, scissor_y1);
|
|
|
|
max_x = std::min(max_x, scissor_x2);
|
|
|
|
max_y = std::min(max_y, scissor_y2);
|
|
|
|
}
|
|
|
|
|
|
|
|
min_x &= Fix12P4::IntMask();
|
|
|
|
min_y &= Fix12P4::IntMask();
|
|
|
|
max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
|
|
|
|
max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask());
|
|
|
|
|
|
|
|
const int bias0 =
|
|
|
|
IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0;
|
|
|
|
const int bias1 =
|
|
|
|
IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0;
|
|
|
|
const int bias2 =
|
|
|
|
IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0;
|
|
|
|
|
|
|
|
const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
|
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
const auto textures = regs.texturing.GetTextures();
|
2023-06-24 00:59:18 +02:00
|
|
|
const auto tev_stages = regs.texturing.GetTevStages();
|
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
fb.Bind();
|
|
|
|
|
2023-06-24 00:59:18 +02:00
|
|
|
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
|
|
|
// TODO: Not sure if looping through x first might be faster
|
|
|
|
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
2023-08-28 10:09:23 +02:00
|
|
|
const auto process_scanline = [&, y] {
|
|
|
|
for (u16 x = min_x + 8; x < max_x; x += 0x10) {
|
|
|
|
// Do not process the pixel if it's inside the scissor box and the scissor mode is
|
|
|
|
// set to Exclude.
|
|
|
|
if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) {
|
|
|
|
if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) {
|
|
|
|
continue;
|
|
|
|
}
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
// Calculate the barycentric coordinates w0, w1 and w2
|
|
|
|
const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
|
|
|
|
const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
|
|
|
|
const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y});
|
|
|
|
const s32 wsum = w0 + w1 + w2;
|
2023-06-24 00:59:18 +02:00
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
// If current pixel is not covered by the current primitive
|
|
|
|
if (w0 < 0 || w1 < 0 || w2 < 0) {
|
|
|
|
continue;
|
|
|
|
}
|
2023-06-24 00:59:18 +02:00
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
const auto baricentric_coordinates = Common::MakeVec(
|
|
|
|
f24::FromFloat32(static_cast<f32>(w0)), f24::FromFloat32(static_cast<f32>(w1)),
|
|
|
|
f24::FromFloat32(static_cast<f32>(w2)));
|
|
|
|
const f24 interpolated_w_inverse =
|
|
|
|
f24::One() / Common::Dot(w_inverse, baricentric_coordinates);
|
|
|
|
|
|
|
|
// interpolated_z = z / w
|
|
|
|
const float interpolated_z_over_w =
|
|
|
|
(v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 +
|
|
|
|
v2.screenpos[2].ToFloat32() * w2) /
|
|
|
|
wsum;
|
|
|
|
|
|
|
|
// Not fully accurate. About 3 bits in precision are missing.
|
|
|
|
// Z-Buffer (z / w * scale + offset)
|
|
|
|
const float depth_scale =
|
|
|
|
f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32();
|
|
|
|
const float depth_offset =
|
|
|
|
f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32();
|
|
|
|
float depth = interpolated_z_over_w * depth_scale + depth_offset;
|
|
|
|
|
|
|
|
// Potentially switch to W-Buffer
|
|
|
|
if (regs.rasterizer.depthmap_enable ==
|
|
|
|
Pica::RasterizerRegs::DepthBuffering::WBuffering) {
|
|
|
|
// W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
|
|
|
|
depth *= interpolated_w_inverse.ToFloat32() * wsum;
|
|
|
|
}
|
2023-06-24 00:59:18 +02:00
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
// Clamp the result
|
|
|
|
depth = std::clamp(depth, 0.0f, 1.0f);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Perspective correct attribute interpolation:
|
|
|
|
* Attribute values cannot be calculated by simple linear interpolation since
|
|
|
|
* they are not linear in screen space. For example, when interpolating a
|
|
|
|
* texture coordinate across two vertices, something simple like
|
|
|
|
* u = (u0*w0 + u1*w1)/(w0+w1)
|
|
|
|
* will not work. However, the attribute value divided by the
|
|
|
|
* clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear
|
|
|
|
* in screenspace. Hence, we can linearly interpolate these two independently and
|
|
|
|
* calculate the interpolated attribute by dividing the results.
|
|
|
|
* I.e.
|
|
|
|
* u_over_w = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1)
|
|
|
|
* one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
|
|
|
* u = u_over_w / one_over_w
|
|
|
|
*
|
|
|
|
* The generalization to three vertices is straightforward in baricentric
|
|
|
|
*coordinates.
|
|
|
|
**/
|
|
|
|
const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) {
|
|
|
|
auto attr_over_w = Common::MakeVec(attr0, attr1, attr2);
|
|
|
|
f24 interpolated_attr_over_w =
|
|
|
|
Common::Dot(attr_over_w, baricentric_coordinates);
|
|
|
|
return interpolated_attr_over_w * interpolated_w_inverse;
|
|
|
|
};
|
2023-06-24 00:59:18 +02:00
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
const Common::Vec4<u8> primary_color{
|
|
|
|
static_cast<u8>(
|
|
|
|
round(get_interpolated_attribute(v0.color.r(), v1.color.r(), v2.color.r())
|
|
|
|
.ToFloat32() *
|
|
|
|
255)),
|
|
|
|
static_cast<u8>(
|
|
|
|
round(get_interpolated_attribute(v0.color.g(), v1.color.g(), v2.color.g())
|
|
|
|
.ToFloat32() *
|
|
|
|
255)),
|
|
|
|
static_cast<u8>(
|
|
|
|
round(get_interpolated_attribute(v0.color.b(), v1.color.b(), v2.color.b())
|
|
|
|
.ToFloat32() *
|
|
|
|
255)),
|
|
|
|
static_cast<u8>(
|
|
|
|
round(get_interpolated_attribute(v0.color.a(), v1.color.a(), v2.color.a())
|
|
|
|
.ToFloat32() *
|
|
|
|
255)),
|
2023-06-24 00:59:18 +02:00
|
|
|
};
|
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
std::array<Common::Vec2<f24>, 3> uv;
|
|
|
|
uv[0].u() = get_interpolated_attribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
|
|
|
|
uv[0].v() = get_interpolated_attribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
|
|
|
|
uv[1].u() = get_interpolated_attribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
|
|
|
|
uv[1].v() = get_interpolated_attribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
|
|
|
|
uv[2].u() = get_interpolated_attribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
|
|
|
|
uv[2].v() = get_interpolated_attribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
|
|
|
|
|
|
|
|
// Sample bound texture units.
|
|
|
|
const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
|
|
|
|
const auto texture_color = TextureColor(uv, textures, tc0_w);
|
|
|
|
|
|
|
|
Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
|
|
|
|
Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
|
|
|
|
|
|
|
|
if (!regs.lighting.disable) {
|
|
|
|
const auto normquat =
|
|
|
|
Common::Quaternion<f32>{
|
|
|
|
{get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x)
|
|
|
|
.ToFloat32(),
|
|
|
|
get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y)
|
|
|
|
.ToFloat32(),
|
|
|
|
get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z)
|
|
|
|
.ToFloat32()},
|
|
|
|
get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(),
|
|
|
|
}
|
|
|
|
.Normalized();
|
|
|
|
|
|
|
|
const Common::Vec3f view{
|
|
|
|
get_interpolated_attribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(),
|
|
|
|
get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
|
|
|
|
get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
|
|
|
|
};
|
|
|
|
std::tie(primary_fragment_color, secondary_fragment_color) =
|
|
|
|
ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view,
|
|
|
|
texture_color);
|
|
|
|
}
|
2023-06-24 00:59:18 +02:00
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
// Write the TEV stages.
|
|
|
|
auto combiner_output =
|
|
|
|
WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color,
|
|
|
|
secondary_fragment_color);
|
|
|
|
|
|
|
|
const auto& output_merger = regs.framebuffer.output_merger;
|
|
|
|
if (output_merger.fragment_operation_mode ==
|
|
|
|
FramebufferRegs::FragmentOperationMode::Shadow) {
|
|
|
|
const u32 depth_int = static_cast<u32>(depth * 0xFFFFFF);
|
|
|
|
// Use green color as the shadow intensity
|
|
|
|
const u8 stencil = combiner_output.y;
|
|
|
|
fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil);
|
|
|
|
// Skip the normal output merger pipeline if it is in shadow mode
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Does alpha testing happen before or after stencil?
|
|
|
|
if (!DoAlphaTest(combiner_output.a())) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
WriteFog(depth, combiner_output);
|
|
|
|
if (!DoDepthStencilTest(x, y, depth)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
const auto result = PixelColor(x, y, combiner_output);
|
|
|
|
if (regs.framebuffer.framebuffer.allow_color_write != 0) {
|
|
|
|
fb.DrawPixel(x >> 4, y >> 4, result);
|
|
|
|
}
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
2023-08-28 10:09:23 +02:00
|
|
|
};
|
|
|
|
sw_workers.QueueWork(std::move(process_scanline));
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
2023-08-28 10:09:23 +02:00
|
|
|
sw_workers.WaitForRequests();
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
|
|
|
std::span<const Common::Vec2<f24>, 3> uv,
|
|
|
|
std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const {
|
|
|
|
std::array<Common::Vec4<u8>, 4> texture_color{};
|
|
|
|
for (u32 i = 0; i < 3; ++i) {
|
|
|
|
const auto& texture = textures[i];
|
|
|
|
if (!texture.enabled) [[unlikely]] {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (texture.config.address == 0) [[unlikely]] {
|
|
|
|
texture_color[i] = {0, 0, 0, 255};
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const s32 coordinate_i = (i == 2 && regs.texturing.main_config.texture2_use_coord1) ? 1 : i;
|
|
|
|
f24 u = uv[coordinate_i].u();
|
|
|
|
f24 v = uv[coordinate_i].v();
|
|
|
|
|
|
|
|
// Only unit 0 respects the texturing type (according to 3DBrew)
|
|
|
|
PAddr texture_address = texture.config.GetPhysicalAddress();
|
|
|
|
f24 shadow_z;
|
|
|
|
if (i == 0) {
|
|
|
|
switch (texture.config.type) {
|
|
|
|
case TexturingRegs::TextureConfig::Texture2D:
|
|
|
|
break;
|
|
|
|
case TexturingRegs::TextureConfig::ShadowCube:
|
|
|
|
case TexturingRegs::TextureConfig::TextureCube: {
|
|
|
|
std::tie(u, v, shadow_z, texture_address) =
|
|
|
|
ConvertCubeCoord(u, v, tc0_w, regs.texturing);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case TexturingRegs::TextureConfig::Projection2D: {
|
|
|
|
u /= tc0_w;
|
|
|
|
v /= tc0_w;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case TexturingRegs::TextureConfig::Shadow2D: {
|
|
|
|
if (!regs.texturing.shadow.orthographic) {
|
|
|
|
u /= tc0_w;
|
|
|
|
v /= tc0_w;
|
|
|
|
}
|
|
|
|
shadow_z = f24::FromFloat32(std::abs(tc0_w.ToFloat32()));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case TexturingRegs::TextureConfig::Disabled:
|
|
|
|
continue; // skip this unit and continue to the next unit
|
|
|
|
default:
|
|
|
|
LOG_ERROR(HW_GPU, "Unhandled texture type {:x}", (int)texture.config.type);
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const f24 width = f24::FromFloat32(static_cast<f32>(texture.config.width));
|
|
|
|
const f24 height = f24::FromFloat32(static_cast<f32>(texture.config.height));
|
|
|
|
s32 s = static_cast<s32>((u * width).ToFloat32());
|
|
|
|
s32 t = static_cast<s32>((v * height).ToFloat32());
|
|
|
|
|
|
|
|
bool use_border_s = false;
|
|
|
|
bool use_border_t = false;
|
|
|
|
|
|
|
|
if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) {
|
|
|
|
use_border_s = s < 0 || s >= static_cast<s32>(texture.config.width);
|
|
|
|
} else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) {
|
|
|
|
use_border_s = s >= static_cast<s32>(texture.config.width);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) {
|
|
|
|
use_border_t = t < 0 || t >= static_cast<s32>(texture.config.height);
|
|
|
|
} else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) {
|
|
|
|
use_border_t = t >= static_cast<s32>(texture.config.height);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (use_border_s || use_border_t) {
|
|
|
|
const auto border_color = texture.config.border_color;
|
|
|
|
texture_color[i] = Common::MakeVec(border_color.r.Value(), border_color.g.Value(),
|
|
|
|
border_color.b.Value(), border_color.a.Value())
|
|
|
|
.Cast<u8>();
|
|
|
|
} else {
|
|
|
|
// Textures are laid out from bottom to top, hence we invert the t coordinate.
|
|
|
|
// NOTE: This may not be the right place for the inversion.
|
|
|
|
// TODO: Check if this applies to ETC textures, too.
|
|
|
|
s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
|
|
|
|
t = texture.config.height - 1 -
|
|
|
|
GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
|
|
|
|
|
|
|
|
const u8* texture_data = memory.GetPhysicalPointer(texture_address);
|
|
|
|
const auto info = TextureInfo::FromPicaRegister(texture.config, texture.format);
|
|
|
|
|
|
|
|
// TODO: Apply the min and mag filters to the texture
|
|
|
|
texture_color[i] = LookupTexture(texture_data, s, t, info);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == 0 && (texture.config.type == TexturingRegs::TextureConfig::Shadow2D ||
|
|
|
|
texture.config.type == TexturingRegs::TextureConfig::ShadowCube)) {
|
|
|
|
|
|
|
|
s32 z_int = static_cast<s32>(std::min(shadow_z.ToFloat32(), 1.0f) * 0xFFFFFF);
|
|
|
|
z_int -= regs.texturing.shadow.bias << 1;
|
|
|
|
const auto& color = texture_color[i];
|
|
|
|
const s32 z_ref = (color.w << 16) | (color.z << 8) | color.y;
|
|
|
|
u8 density;
|
|
|
|
if (z_ref >= z_int) {
|
|
|
|
density = color.x;
|
|
|
|
} else {
|
|
|
|
density = 0;
|
|
|
|
}
|
|
|
|
texture_color[i] = {density, density, density, density};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sample procedural texture
|
|
|
|
if (regs.texturing.main_config.texture3_enable) {
|
|
|
|
const auto& proctex_uv = uv[regs.texturing.main_config.texture3_coordinates];
|
|
|
|
texture_color[3] = ProcTex(proctex_uv.u().ToFloat32(), proctex_uv.v().ToFloat32(),
|
|
|
|
regs.texturing, state.proctex);
|
|
|
|
}
|
|
|
|
|
|
|
|
return texture_color;
|
|
|
|
}
|
|
|
|
|
|
|
|
Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y,
|
2023-08-28 10:09:23 +02:00
|
|
|
Common::Vec4<u8> combiner_output) const {
|
2023-06-24 00:59:18 +02:00
|
|
|
const auto dest = fb.GetPixel(x >> 4, y >> 4);
|
|
|
|
Common::Vec4<u8> blend_output = combiner_output;
|
|
|
|
|
|
|
|
const auto& output_merger = regs.framebuffer.output_merger;
|
|
|
|
if (output_merger.alphablend_enable) {
|
|
|
|
const auto params = output_merger.alpha_blending;
|
|
|
|
const auto lookup_factor = [&](u32 channel, FramebufferRegs::BlendFactor factor) -> u8 {
|
|
|
|
DEBUG_ASSERT(channel < 4);
|
|
|
|
|
|
|
|
const Common::Vec4<u8> blend_const =
|
|
|
|
Common::MakeVec(
|
|
|
|
output_merger.blend_const.r.Value(), output_merger.blend_const.g.Value(),
|
|
|
|
output_merger.blend_const.b.Value(), output_merger.blend_const.a.Value())
|
|
|
|
.Cast<u8>();
|
|
|
|
|
|
|
|
switch (factor) {
|
|
|
|
case FramebufferRegs::BlendFactor::Zero:
|
|
|
|
return 0;
|
|
|
|
case FramebufferRegs::BlendFactor::One:
|
|
|
|
return 255;
|
|
|
|
case FramebufferRegs::BlendFactor::SourceColor:
|
|
|
|
return combiner_output[channel];
|
|
|
|
case FramebufferRegs::BlendFactor::OneMinusSourceColor:
|
|
|
|
return 255 - combiner_output[channel];
|
|
|
|
case FramebufferRegs::BlendFactor::DestColor:
|
|
|
|
return dest[channel];
|
|
|
|
case FramebufferRegs::BlendFactor::OneMinusDestColor:
|
|
|
|
return 255 - dest[channel];
|
|
|
|
case FramebufferRegs::BlendFactor::SourceAlpha:
|
|
|
|
return combiner_output.a();
|
|
|
|
case FramebufferRegs::BlendFactor::OneMinusSourceAlpha:
|
|
|
|
return 255 - combiner_output.a();
|
|
|
|
case FramebufferRegs::BlendFactor::DestAlpha:
|
|
|
|
return dest.a();
|
|
|
|
case FramebufferRegs::BlendFactor::OneMinusDestAlpha:
|
|
|
|
return 255 - dest.a();
|
|
|
|
case FramebufferRegs::BlendFactor::ConstantColor:
|
|
|
|
return blend_const[channel];
|
|
|
|
case FramebufferRegs::BlendFactor::OneMinusConstantColor:
|
|
|
|
return 255 - blend_const[channel];
|
|
|
|
case FramebufferRegs::BlendFactor::ConstantAlpha:
|
|
|
|
return blend_const.a();
|
|
|
|
case FramebufferRegs::BlendFactor::OneMinusConstantAlpha:
|
|
|
|
return 255 - blend_const.a();
|
|
|
|
case FramebufferRegs::BlendFactor::SourceAlphaSaturate:
|
|
|
|
// Returns 1.0 for the alpha channel
|
|
|
|
if (channel == 3) {
|
|
|
|
return 255;
|
|
|
|
}
|
|
|
|
return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a()));
|
|
|
|
default:
|
|
|
|
LOG_CRITICAL(HW_GPU, "Unknown blend factor {:x}", factor);
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return combiner_output[channel];
|
|
|
|
};
|
|
|
|
|
|
|
|
const auto srcfactor = Common::MakeVec(
|
|
|
|
lookup_factor(0, params.factor_source_rgb), lookup_factor(1, params.factor_source_rgb),
|
|
|
|
lookup_factor(2, params.factor_source_rgb), lookup_factor(3, params.factor_source_a));
|
|
|
|
|
|
|
|
const auto dstfactor = Common::MakeVec(
|
|
|
|
lookup_factor(0, params.factor_dest_rgb), lookup_factor(1, params.factor_dest_rgb),
|
|
|
|
lookup_factor(2, params.factor_dest_rgb), lookup_factor(3, params.factor_dest_a));
|
|
|
|
|
|
|
|
blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor,
|
|
|
|
params.blend_equation_rgb);
|
|
|
|
blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor,
|
|
|
|
params.blend_equation_a)
|
|
|
|
.a();
|
|
|
|
} else {
|
|
|
|
blend_output =
|
|
|
|
Common::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
|
|
|
|
LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
|
|
|
|
LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op),
|
|
|
|
LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op));
|
|
|
|
}
|
|
|
|
|
|
|
|
const Common::Vec4<u8> result = {
|
|
|
|
output_merger.red_enable ? blend_output.r() : dest.r(),
|
|
|
|
output_merger.green_enable ? blend_output.g() : dest.g(),
|
|
|
|
output_merger.blue_enable ? blend_output.b() : dest.b(),
|
|
|
|
output_merger.alpha_enable ? blend_output.a() : dest.a(),
|
|
|
|
};
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-08-28 08:59:32 +02:00
|
|
|
Common::Vec4<u8> RasterizerSoftware::WriteTevConfig(
|
2023-06-24 00:59:18 +02:00
|
|
|
std::span<const Common::Vec4<u8>, 4> texture_color,
|
|
|
|
std::span<const Pica::TexturingRegs::TevStageConfig, 6> tev_stages,
|
|
|
|
Common::Vec4<u8> primary_color, Common::Vec4<u8> primary_fragment_color,
|
2023-07-07 02:15:59 +02:00
|
|
|
Common::Vec4<u8> secondary_fragment_color) {
|
2023-06-24 00:59:18 +02:00
|
|
|
/**
|
|
|
|
* Texture environment - consists of 6 stages of color and alpha combining.
|
|
|
|
* Color combiners take three input color values from some source (e.g. interpolated
|
|
|
|
* vertex color, texture color, previous stage, etc), perform some very simple
|
|
|
|
* operations on each of them (e.g. inversion) and then calculate the output color
|
|
|
|
* with some basic arithmetic. Alpha combiners can be configured separately but work
|
|
|
|
* analogously.
|
|
|
|
**/
|
2023-08-28 08:59:32 +02:00
|
|
|
Common::Vec4<u8> combiner_output = primary_color;
|
2023-06-24 00:59:18 +02:00
|
|
|
Common::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
|
|
|
|
Common::Vec4<u8> next_combiner_buffer =
|
|
|
|
Common::MakeVec(regs.texturing.tev_combiner_buffer_color.r.Value(),
|
|
|
|
regs.texturing.tev_combiner_buffer_color.g.Value(),
|
|
|
|
regs.texturing.tev_combiner_buffer_color.b.Value(),
|
|
|
|
regs.texturing.tev_combiner_buffer_color.a.Value())
|
|
|
|
.Cast<u8>();
|
|
|
|
|
|
|
|
for (u32 tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
|
|
|
|
const auto& tev_stage = tev_stages[tev_stage_index];
|
|
|
|
using Source = TexturingRegs::TevStageConfig::Source;
|
|
|
|
|
|
|
|
auto get_source = [&](Source source) -> Common::Vec4<u8> {
|
|
|
|
switch (source) {
|
|
|
|
case Source::PrimaryColor:
|
|
|
|
return primary_color;
|
|
|
|
case Source::PrimaryFragmentColor:
|
|
|
|
return primary_fragment_color;
|
|
|
|
case Source::SecondaryFragmentColor:
|
|
|
|
return secondary_fragment_color;
|
|
|
|
case Source::Texture0:
|
|
|
|
return texture_color[0];
|
|
|
|
case Source::Texture1:
|
|
|
|
return texture_color[1];
|
|
|
|
case Source::Texture2:
|
|
|
|
return texture_color[2];
|
|
|
|
case Source::Texture3:
|
|
|
|
return texture_color[3];
|
|
|
|
case Source::PreviousBuffer:
|
|
|
|
return combiner_buffer;
|
|
|
|
case Source::Constant:
|
|
|
|
return Common::MakeVec(tev_stage.const_r.Value(), tev_stage.const_g.Value(),
|
|
|
|
tev_stage.const_b.Value(), tev_stage.const_a.Value())
|
|
|
|
.Cast<u8>();
|
|
|
|
case Source::Previous:
|
|
|
|
return combiner_output;
|
|
|
|
default:
|
|
|
|
LOG_ERROR(HW_GPU, "Unknown color combiner source {}", (int)source);
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
return {0, 0, 0, 0};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Color combiner
|
|
|
|
* NOTE: Not sure if the alpha combiner might use the color output of the previous
|
|
|
|
* stage as input. Hence, we currently don't directly write the result to
|
|
|
|
* combiner_output.rgb(), but instead store it in a temporary variable until
|
|
|
|
* alpha combining has been done.
|
|
|
|
**/
|
|
|
|
const std::array<Common::Vec3<u8>, 3> color_result = {
|
|
|
|
GetColorModifier(tev_stage.color_modifier1, get_source(tev_stage.color_source1)),
|
|
|
|
GetColorModifier(tev_stage.color_modifier2, get_source(tev_stage.color_source2)),
|
|
|
|
GetColorModifier(tev_stage.color_modifier3, get_source(tev_stage.color_source3)),
|
|
|
|
};
|
|
|
|
const Common::Vec3<u8> color_output = ColorCombine(tev_stage.color_op, color_result);
|
|
|
|
|
|
|
|
u8 alpha_output;
|
|
|
|
if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
|
|
|
|
// result of Dot3_RGBA operation is also placed to the alpha component
|
|
|
|
alpha_output = color_output.x;
|
|
|
|
} else {
|
|
|
|
// alpha combiner
|
|
|
|
const std::array<u8, 3> alpha_result = {{
|
|
|
|
GetAlphaModifier(tev_stage.alpha_modifier1, get_source(tev_stage.alpha_source1)),
|
|
|
|
GetAlphaModifier(tev_stage.alpha_modifier2, get_source(tev_stage.alpha_source2)),
|
|
|
|
GetAlphaModifier(tev_stage.alpha_modifier3, get_source(tev_stage.alpha_source3)),
|
|
|
|
}};
|
|
|
|
alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
|
|
|
|
}
|
|
|
|
|
|
|
|
combiner_output[0] = std::min(255U, color_output.r() * tev_stage.GetColorMultiplier());
|
|
|
|
combiner_output[1] = std::min(255U, color_output.g() * tev_stage.GetColorMultiplier());
|
|
|
|
combiner_output[2] = std::min(255U, color_output.b() * tev_stage.GetColorMultiplier());
|
|
|
|
combiner_output[3] = std::min(255U, alpha_output * tev_stage.GetAlphaMultiplier());
|
|
|
|
|
|
|
|
combiner_buffer = next_combiner_buffer;
|
|
|
|
|
|
|
|
if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
|
|
|
|
tev_stage_index)) {
|
|
|
|
next_combiner_buffer.r() = combiner_output.r();
|
|
|
|
next_combiner_buffer.g() = combiner_output.g();
|
|
|
|
next_combiner_buffer.b() = combiner_output.b();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
|
|
|
|
tev_stage_index)) {
|
|
|
|
next_combiner_buffer.a() = combiner_output.a();
|
|
|
|
}
|
|
|
|
}
|
2023-08-28 08:59:32 +02:00
|
|
|
|
|
|
|
return combiner_output;
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
|
|
|
|
2023-08-28 10:09:23 +02:00
|
|
|
void RasterizerSoftware::WriteFog(float depth, Common::Vec4<u8>& combiner_output) const {
|
2023-06-24 00:59:18 +02:00
|
|
|
/**
|
|
|
|
* Apply fog combiner. Not fully accurate. We'd have to know what data type is used to
|
|
|
|
* store the depth etc. Using float for now until we know more about Pica datatypes.
|
|
|
|
**/
|
|
|
|
if (regs.texturing.fog_mode == TexturingRegs::FogMode::Fog) {
|
|
|
|
const Common::Vec3<u8> fog_color =
|
|
|
|
Common::MakeVec(regs.texturing.fog_color.r.Value(), regs.texturing.fog_color.g.Value(),
|
|
|
|
regs.texturing.fog_color.b.Value())
|
|
|
|
.Cast<u8>();
|
|
|
|
|
|
|
|
float fog_index;
|
|
|
|
if (regs.texturing.fog_flip) {
|
|
|
|
fog_index = (1.0f - depth) * 128.0f;
|
|
|
|
} else {
|
|
|
|
fog_index = depth * 128.0f;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate clamped fog factor from LUT for given fog index
|
|
|
|
const f32 fog_i = std::clamp(floorf(fog_index), 0.0f, 127.0f);
|
|
|
|
const f32 fog_f = fog_index - fog_i;
|
|
|
|
const auto& fog_lut_entry = state.fog.lut[static_cast<u32>(fog_i)];
|
|
|
|
f32 fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f;
|
|
|
|
fog_factor = std::clamp(fog_factor, 0.0f, 1.0f);
|
|
|
|
for (u32 i = 0; i < 3; i++) {
|
|
|
|
combiner_output[i] = static_cast<u8>(fog_factor * combiner_output[i] +
|
|
|
|
(1.0f - fog_factor) * fog_color[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool RasterizerSoftware::DoAlphaTest(u8 alpha) const {
|
|
|
|
const auto& output_merger = regs.framebuffer.output_merger;
|
|
|
|
if (!output_merger.alpha_test.enable) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
switch (output_merger.alpha_test.func) {
|
|
|
|
case FramebufferRegs::CompareFunc::Never:
|
|
|
|
return false;
|
|
|
|
case FramebufferRegs::CompareFunc::Always:
|
|
|
|
return true;
|
|
|
|
case FramebufferRegs::CompareFunc::Equal:
|
|
|
|
return alpha == output_merger.alpha_test.ref;
|
|
|
|
case FramebufferRegs::CompareFunc::NotEqual:
|
|
|
|
return alpha != output_merger.alpha_test.ref;
|
|
|
|
case FramebufferRegs::CompareFunc::LessThan:
|
|
|
|
return alpha < output_merger.alpha_test.ref;
|
|
|
|
case FramebufferRegs::CompareFunc::LessThanOrEqual:
|
|
|
|
return alpha <= output_merger.alpha_test.ref;
|
|
|
|
case FramebufferRegs::CompareFunc::GreaterThan:
|
|
|
|
return alpha > output_merger.alpha_test.ref;
|
|
|
|
case FramebufferRegs::CompareFunc::GreaterThanOrEqual:
|
|
|
|
return alpha >= output_merger.alpha_test.ref;
|
2023-07-05 06:00:24 +02:00
|
|
|
default:
|
|
|
|
LOG_CRITICAL(Render_Software, "Unknown alpha test condition {}",
|
|
|
|
output_merger.alpha_test.func.Value());
|
|
|
|
return false;
|
2023-06-24 00:59:18 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-05 06:00:24 +02:00
|
|
|
bool RasterizerSoftware::DoDepthStencilTest(u16 x, u16 y, float depth) const {
|
2023-06-24 00:59:18 +02:00
|
|
|
const auto& framebuffer = regs.framebuffer.framebuffer;
|
|
|
|
const auto stencil_test = regs.framebuffer.output_merger.stencil_test;
|
|
|
|
u8 old_stencil = 0;
|
|
|
|
|
|
|
|
const auto update_stencil = [&](Pica::FramebufferRegs::StencilAction action) {
|
|
|
|
const u8 new_stencil =
|
|
|
|
PerformStencilAction(action, old_stencil, stencil_test.reference_value);
|
|
|
|
if (framebuffer.allow_depth_stencil_write != 0) {
|
|
|
|
const u8 stencil =
|
|
|
|
(new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask);
|
|
|
|
fb.SetStencil(x >> 4, y >> 4, stencil);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-07-05 06:00:24 +02:00
|
|
|
const bool stencil_action_enable =
|
|
|
|
regs.framebuffer.output_merger.stencil_test.enable &&
|
|
|
|
regs.framebuffer.framebuffer.depth_format == FramebufferRegs::DepthFormat::D24S8;
|
|
|
|
|
2023-06-24 00:59:18 +02:00
|
|
|
if (stencil_action_enable) {
|
|
|
|
old_stencil = fb.GetStencil(x >> 4, y >> 4);
|
|
|
|
const u8 dest = old_stencil & stencil_test.input_mask;
|
|
|
|
const u8 ref = stencil_test.reference_value & stencil_test.input_mask;
|
|
|
|
bool pass = false;
|
|
|
|
switch (stencil_test.func) {
|
|
|
|
case FramebufferRegs::CompareFunc::Never:
|
|
|
|
pass = false;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::Always:
|
|
|
|
pass = true;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::Equal:
|
|
|
|
pass = (ref == dest);
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::NotEqual:
|
|
|
|
pass = (ref != dest);
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::LessThan:
|
|
|
|
pass = (ref < dest);
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::LessThanOrEqual:
|
|
|
|
pass = (ref <= dest);
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::GreaterThan:
|
|
|
|
pass = (ref > dest);
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::GreaterThanOrEqual:
|
|
|
|
pass = (ref >= dest);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!pass) {
|
|
|
|
update_stencil(stencil_test.action_stencil_fail);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const u32 num_bits = FramebufferRegs::DepthBitsPerPixel(framebuffer.depth_format);
|
|
|
|
const u32 z = static_cast<u32>(depth * ((1 << num_bits) - 1));
|
|
|
|
|
|
|
|
const auto& output_merger = regs.framebuffer.output_merger;
|
|
|
|
if (output_merger.depth_test_enable) {
|
|
|
|
const u32 ref_z = fb.GetDepth(x >> 4, y >> 4);
|
|
|
|
bool pass = false;
|
|
|
|
switch (output_merger.depth_test_func) {
|
|
|
|
case FramebufferRegs::CompareFunc::Never:
|
|
|
|
pass = false;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::Always:
|
|
|
|
pass = true;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::Equal:
|
|
|
|
pass = z == ref_z;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::NotEqual:
|
|
|
|
pass = z != ref_z;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::LessThan:
|
|
|
|
pass = z < ref_z;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::LessThanOrEqual:
|
|
|
|
pass = z <= ref_z;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::GreaterThan:
|
|
|
|
pass = z > ref_z;
|
|
|
|
break;
|
|
|
|
case FramebufferRegs::CompareFunc::GreaterThanOrEqual:
|
|
|
|
pass = z >= ref_z;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!pass) {
|
|
|
|
if (stencil_action_enable) {
|
|
|
|
update_stencil(stencil_test.action_depth_fail);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (framebuffer.allow_depth_stencil_write != 0 && output_merger.depth_write_enable) {
|
|
|
|
fb.SetDepth(x >> 4, y >> 4, z);
|
|
|
|
}
|
|
|
|
// The stencil depth_pass action is executed even if depth testing is disabled
|
|
|
|
if (stencil_action_enable) {
|
|
|
|
update_stencil(stencil_test.action_depth_pass);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2023-03-27 13:29:17 +02:00
|
|
|
}
|
|
|
|
|
2023-06-24 00:59:18 +02:00
|
|
|
} // namespace SwRenderer
|