vk_shader_gen_spv: Implement proctex sampler

* Fixes MHS menu and probably other games
This commit is contained in:
GPUCode
2022-11-21 21:23:23 +02:00
parent c8e9b465e2
commit 88f34a7d69
5 changed files with 384 additions and 28 deletions

View File

@@ -1077,7 +1077,7 @@ float ProcTexNoiseRand2D(vec2 point) {
v2 += 10 + u2; v2 += 10 + u2;
v2 &= 0xF; v2 &= 0xF;
v2 ^= table[u2]; v2 ^= table[u2];
return -1.0 + float(v2) * 2.0/ 15.0; return -1.0 + float(v2) * (2.0/15.0);
} }
float ProcTexNoiseCoef(vec2 x) { float ProcTexNoiseCoef(vec2 x) {

View File

@@ -115,11 +115,11 @@ struct PicaFSConfigState {
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift; BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift; BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
}; };
u8 lut_width; s32 lut_width;
u8 lut_offset0; s32 lut_offset0;
u8 lut_offset1; s32 lut_offset1;
u8 lut_offset2; s32 lut_offset2;
u8 lut_offset3; s32 lut_offset3;
u8 lod_min; u8 lod_min;
u8 lod_max; u8 lod_max;
} proctex; } proctex;

View File

@@ -20,6 +20,9 @@ FragmentModule::FragmentModule(const PicaFSConfig& config) : Sirit::Module{0x000
DefineArithmeticTypes(); DefineArithmeticTypes();
DefineUniformStructs(); DefineUniformStructs();
DefineInterface(); DefineInterface();
if (config.state.proctex.enable) {
DefineProcTexSampler();
}
DefineEntryPoint(); DefineEntryPoint();
} }
@@ -505,9 +508,15 @@ void FragmentModule::WriteTevStage(s32 index) {
} }
} }
using ProcTexClamp = TexturingRegs::ProcTexClamp;
using ProcTexShift = TexturingRegs::ProcTexShift;
using ProcTexCombiner = TexturingRegs::ProcTexCombiner;
using ProcTexFilter = TexturingRegs::ProcTexFilter;
bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) { bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) {
using CompareFunc = FramebufferRegs::CompareFunc; using CompareFunc = FramebufferRegs::CompareFunc;
// The compare func is to keep the fragment so we invert it to discard it
const auto Compare = [this, func](Id alpha, Id alphatest_ref) { const auto Compare = [this, func](Id alpha, Id alphatest_ref) {
switch (func) { switch (func) {
case CompareFunc::Equal: case CompareFunc::Equal:
@@ -625,13 +634,14 @@ Id FragmentModule::SampleTexture(u32 texture_unit) {
case 1: case 1:
return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id); return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id);
case 2: case 2:
if (state.texture2_use_coord1) if (state.texture2_use_coord1) {
return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id); return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id);
else } else {
return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id); return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id);
}
case 3: case 3:
if (false && state.proctex.enable) { if (state.proctex.enable) {
//return "ProcTex()"; return OpFunctionCall(vec_ids.Get(4), proctex_func);
} else { } else {
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it"); LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
return zero_vec; return zero_vec;
@@ -645,13 +655,12 @@ Id FragmentModule::SampleTexture(u32 texture_unit) {
Id FragmentModule::CompareShadow(Id pixel, Id z) { Id FragmentModule::CompareShadow(Id pixel, Id z) {
const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))}; const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))};
const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))}; const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))};
const Id s8_mul{OpFMul(f32_id, pixel_s8, ConstF32(1.f / 255.f))}; const Id s8_f32{OpFMul(f32_id, pixel_s8, ConstF32(1.f / 255.f))};
const Id d24_leq_z{OpULessThanEqual(bool_id, pixel_d24, z)}; const Id d24_leq_z{OpULessThanEqual(bool_id, pixel_d24, z)};
return OpSelect(f32_id, d24_leq_z, ConstF32(0.f), s8_mul); return OpSelect(f32_id, d24_leq_z, ConstF32(0.f), s8_f32);
} }
Id FragmentModule::SampleShadow() { Id FragmentModule::SampleShadow() {
dump_shader = true;
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)}; const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w),
@@ -701,6 +710,200 @@ Id FragmentModule::SampleShadow() {
return OpCompositeConstruct(vec_ids.Get(4), val, val, val, val); return OpCompositeConstruct(vec_ids.Get(4), val, val, val, val);
} }
Id FragmentModule::AppendProcTexShiftOffset(Id v, ProcTexShift mode, ProcTexClamp clamp_mode) {
const Id offset{clamp_mode == ProcTexClamp::MirroredRepeat ? ConstF32(1.f) : ConstF32(0.5f)};
const Id v_i32{OpConvertFToS(i32_id, v)};
const auto Shift = [&](bool even) -> Id {
const Id temp1{OpSDiv(i32_id, even ? OpIAdd(i32_id, v_i32, ConstS32(1)) : v_i32, ConstS32(2))};
const Id temp2{OpConvertSToF(f32_id, OpSMod(i32_id, temp1, ConstS32(2)))};
return OpFMul(f32_id, offset, temp2);
};
switch (mode) {
case ProcTexShift::None:
return ConstF32(0.f);
case ProcTexShift::Odd:
return Shift(false);
case ProcTexShift::Even:
return Shift(true);
default:
LOG_CRITICAL(Render_Vulkan, "Unknown shift mode {}", mode);
return ConstF32(0.f);
}
}
Id FragmentModule::AppendProcTexClamp(Id var, ProcTexClamp mode) {
const Id zero{ConstF32(0.f)};
const Id one{ConstF32(1.f)};
const auto MirroredRepeat = [&]() -> Id {
const Id fract{OpFract(f32_id, var)};
const Id cond{OpIEqual(bool_id, OpSMod(i32_id, OpConvertFToS(i32_id, var), ConstS32(2)), ConstS32(0))};
return OpSelect(f32_id, cond, fract, OpFSub(f32_id, one, fract));
};
switch (mode) {
case ProcTexClamp::ToZero:
return OpSelect(f32_id, OpFOrdGreaterThan(bool_id, var, one), zero, var);
case ProcTexClamp::ToEdge:
return OpFMin(f32_id, var, one);
case ProcTexClamp::SymmetricalRepeat:
return OpFract(f32_id, var);
case ProcTexClamp::MirroredRepeat:
return MirroredRepeat();
case ProcTexClamp::Pulse:
return OpSelect(f32_id, OpFOrdGreaterThan(bool_id, var, ConstF32(0.5f)), one, zero);
default:
LOG_CRITICAL(Render_Vulkan, "Unknown clamp mode {}", mode);
return OpFMin(f32_id, var, one);
}
}
Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id v, Id offset) {
const auto combined = [&]() -> Id {
const Id u2v2{OpFma(f32_id, u, u, OpFMul(f32_id, v, v))};
switch (combiner) {
case ProcTexCombiner::U:
return u;
case ProcTexCombiner::U2:
return OpFMul(f32_id, u, u);
case TexturingRegs::ProcTexCombiner::V:
return v;
case TexturingRegs::ProcTexCombiner::V2:
return OpFMul(f32_id, v, v);
case TexturingRegs::ProcTexCombiner::Add:
return OpFMul(f32_id, OpFAdd(f32_id, u, v), ConstF32(0.5f));
case TexturingRegs::ProcTexCombiner::Add2:
return OpFMul(f32_id, u2v2, ConstF32(0.5f));
case TexturingRegs::ProcTexCombiner::SqrtAdd2:
return OpFMin(f32_id, OpSqrt(f32_id, u2v2), ConstF32(1.f));
case TexturingRegs::ProcTexCombiner::Min:
return OpFMin(f32_id, u, v);
case TexturingRegs::ProcTexCombiner::Max:
return OpFMax(f32_id, u, v);
case TexturingRegs::ProcTexCombiner::RMax: {
const Id r{OpFma(f32_id, OpFAdd(f32_id, u, v), ConstF32(0.5f), OpSqrt(f32_id, u2v2))};
return OpFMin(f32_id, OpFMul(f32_id, r, ConstF32(0.5f)), ConstF32(1.f));
}
default:
LOG_CRITICAL(Render_Vulkan, "Unknown combiner {}", combiner);
return ConstF32(0.f);
}
}();
return ProcTexLookupLUT(offset, combined);
}
void FragmentModule::DefineProcTexSampler() {
const Id func_type{TypeFunction(vec_ids.Get(4))};
proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
AddLabel(OpLabel());
// Define noise tables at the beginning of the function
if (config.state.proctex.noise_enable) {
noise1d_table = DefineVar<false>(TypeArray(i32_id, ConstU32(16u)), spv::StorageClass::Function);
noise2d_table = DefineVar<false>(TypeArray(i32_id, ConstU32(16u)), spv::StorageClass::Function);
}
lut_offsets = DefineVar<false>(TypeArray(i32_id, ConstU32(8u)), spv::StorageClass::Function);
Id uv{};
if (config.state.proctex.coord < 3) {
Id texcoord_id{};
switch (config.state.proctex.coord.Value()) {
case 0: texcoord_id = texcoord0_id; break;
case 1: texcoord_id = texcoord1_id; break;
case 2: texcoord_id = texcoord2_id; break;
}
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
uv = OpFAbs(vec_ids.Get(2), texcoord);
} else {
LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3");
uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id));
}
// This LOD formula is the same as the LOD upper limit defined in OpenGL.
// f(x, y) <= m_u + m_v + m_w
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
// Note: this is different from the one normal 2D textures use.
const Id uv_1{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), uv))};
const Id uv_2{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), uv))};
const Id duv{OpFMax(vec_ids.Get(2), uv_1, uv_2)};
// unlike normal texture, the bias is inside the log2
const Id proctex_bias{GetShaderDataMember(f32_id, ConstS32(16))};
const Id bias{OpFMul(f32_id, ConstF32(static_cast<f32>(config.state.proctex.lut_width)), proctex_bias)};
const Id duv_xy{OpFAdd(f32_id, OpCompositeExtract(f32_id, duv, 0), OpCompositeExtract(f32_id, duv, 1))};
Id lod{OpLog2(f32_id, OpFMul(f32_id, OpFAbs(f32_id, bias), duv_xy))};
lod = OpSelect(f32_id, OpFOrdEqual(bool_id, proctex_bias, ConstF32(0.f)), ConstF32(0.f), lod);
lod = OpFClamp(f32_id, lod, ConstF32(std::max(0.0f, static_cast<float>(config.state.proctex.lod_min))),
ConstF32(std::min(7.0f, static_cast<float>(config.state.proctex.lod_max))));
// Get shift offset before noise generation
const Id u_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 1),
config.state.proctex.u_shift,
config.state.proctex.u_clamp)};
const Id v_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 0),
config.state.proctex.v_shift,
config.state.proctex.v_clamp)};
// Generate noise
if (config.state.proctex.noise_enable) {
const Id proctex_noise_a{GetShaderDataMember(vec_ids.Get(2), ConstS32(22))};
const Id noise_coef{ProcTexNoiseCoef(uv)};
uv = OpFAdd(vec_ids.Get(2), uv, OpVectorTimesScalar(vec_ids.Get(2), proctex_noise_a, noise_coef));
uv = OpFAbs(vec_ids.Get(2), uv);
}
// Shift
Id u{OpFAdd(f32_id, OpCompositeExtract(f32_id, uv, 0), u_shift)};
Id v{OpFAdd(f32_id, OpCompositeExtract(f32_id, uv, 1), v_shift)};
// Clamp
u = AppendProcTexClamp(u, config.state.proctex.u_clamp);
v = AppendProcTexClamp(v, config.state.proctex.v_clamp);
// Combine and map
const Id proctex_color_map_offset{GetShaderDataMember(i32_id, ConstS32(12))};
const Id lut_coord{AppendProcTexCombineAndMap(config.state.proctex.color_combiner,
u, v, proctex_color_map_offset)};
Id final_color{};
switch (config.state.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::Nearest: {
final_color = SampleProcTexColor(lut_coord, ConstS32(0));
break;
}
case ProcTexFilter::NearestMipmapNearest:
case ProcTexFilter::LinearMipmapNearest: {
final_color = SampleProcTexColor(lut_coord, OpConvertFToS(i32_id, OpRound(f32_id, lod)));
break;
}
case ProcTexFilter::NearestMipmapLinear:
case ProcTexFilter::LinearMipmapLinear: {
const Id lod_i{OpConvertFToS(i32_id, lod)};
const Id lod_f{OpFract(f32_id, lod)};
const Id color1{SampleProcTexColor(lut_coord, lod_i)};
const Id color2{SampleProcTexColor(lut_coord, OpIAdd(i32_id, lod_i, ConstS32(1)))};
final_color = OpFMix(f32_id, color1, color2, lod_f);
break;
}
}
if (config.state.proctex.separate_alpha) {
const Id proctex_alpha_map_offset{GetShaderDataMember(i32_id, ConstS32(13))};
const Id final_alpha{AppendProcTexCombineAndMap(config.state.proctex.alpha_combiner, u, v,
proctex_alpha_map_offset)};
final_color = OpCompositeConstruct(vec_ids.Get(4), final_color, final_alpha);
}
OpReturnValue(final_color);
OpFunctionEnd();
}
Id FragmentModule::Byteround(Id variable_id, u32 size) { Id FragmentModule::Byteround(Id variable_id, u32 size) {
if (size > 1) { if (size > 1) {
const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))}; const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))};
@@ -712,6 +915,121 @@ Id FragmentModule::Byteround(Id variable_id, u32 size) {
} }
} }
Id FragmentModule::ProcTexLookupLUT(Id offset, Id coord) {
coord = OpFMul(f32_id, coord, ConstF32(128.f));
const Id index_i{OpFClamp(f32_id, OpFloor(f32_id, coord), ConstF32(0.f), ConstF32(127.0f))};
const Id index_f{OpFSub(f32_id, coord, index_i)};
const Id p{OpIAdd(i32_id, OpConvertFToS(i32_id, index_i), offset)};
if (!Sirit::ValidId(texture_buffer_lut_rg)) {
const Id sampled_image{TypeSampledImage(image_buffer_id)};
texture_buffer_lut_rg = OpLoad(sampled_image, texture_buffer_lut_rg_id);
}
const Id entry{OpImageFetch(vec_ids.Get(4), OpImage(image_buffer_id, texture_buffer_lut_rg), p)};
const Id entry_r{OpCompositeExtract(f32_id, entry, 0)};
const Id entry_g{OpCompositeExtract(f32_id, entry, 1)};
return OpFClamp(f32_id, OpFma(f32_id, entry_g, index_f, entry_r), ConstF32(0.f), ConstF32(1.f));
};
Id FragmentModule::ProcTexNoiseCoef(Id x) {
// Noise utility
const auto ProcTexNoiseRand1D = [&](Id v) -> Id {
InitTableS32(noise1d_table, 0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11);
const Id table_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
const Id left_tmp{OpIAdd(i32_id, OpSMod(i32_id, v, ConstS32(9)), ConstS32(2))};
const Id left{OpBitwiseAnd(i32_id, OpIMul(i32_id, left_tmp, ConstS32(3)), ConstS32(0xF))};
const Id table_index{OpBitwiseAnd(i32_id, OpSDiv(i32_id, v, ConstS32(9)), ConstS32(0xF))};
const Id table_value{OpLoad(i32_id, OpAccessChain(table_ptr, noise1d_table, table_index))};
return OpBitwiseXor(i32_id, left, table_value);
};
const auto ProcTexNoiseRand2D = [&](Id point) -> Id {
InitTableS32(noise2d_table, 10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14);
const Id table_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
const Id point_x{OpConvertFToS(i32_id, OpCompositeExtract(f32_id, point, 0))};
const Id point_y{OpConvertFToS(i32_id, OpCompositeExtract(f32_id, point, 1))};
const Id u2{ProcTexNoiseRand1D(point_x)};
const Id cond{OpIEqual(bool_id, OpBitwiseAnd(i32_id, u2, ConstS32(3)), ConstS32(1))};
const Id table_value{OpLoad(i32_id, OpAccessChain(table_ptr, noise2d_table, u2))};
Id v2{ProcTexNoiseRand1D(point_y)};
v2 = OpIAdd(i32_id, v2, OpSelect(i32_id, cond, ConstS32(4), ConstS32(0)));
v2 = OpBitwiseXor(i32_id, v2, OpIMul(i32_id, OpBitwiseAnd(i32_id, u2, ConstS32(1)), ConstS32(6)));
v2 = OpIAdd(i32_id, v2, OpIAdd(i32_id, u2, ConstS32(10)));
v2 = OpBitwiseAnd(i32_id, v2, ConstS32(0xF));
v2 = OpBitwiseXor(i32_id, v2, table_value);
return OpFma(f32_id, OpConvertSToF(f32_id, v2), ConstF32(2.f / 15.f), ConstF32(-1.f));
};
const Id proctex_noise_f{GetShaderDataMember(vec_ids.Get(2), ConstS32(21))};
const Id proctex_noise_p{GetShaderDataMember(vec_ids.Get(2), ConstS32(23))};
const Id grid{OpFMul(vec_ids.Get(2), OpVectorTimesScalar(vec_ids.Get(2), proctex_noise_f, ConstF32(9.f)),
OpFAbs(vec_ids.Get(2), OpFAdd(vec_ids.Get(2), x, proctex_noise_p)))};
const Id point{OpFloor(vec_ids.Get(2), grid)};
const Id frac{OpFSub(vec_ids.Get(2), grid, point)};
const Id frac_x{OpCompositeExtract(f32_id, frac, 0)};
const Id frac_y{OpCompositeExtract(f32_id, frac, 1)};
const Id frac_x_y{OpFAdd(f32_id, frac_x, frac_y)};
const Id g0{OpFMul(f32_id, ProcTexNoiseRand2D(point), frac_x_y)};
const Id frac_x_y_min_one{OpFSub(f32_id, frac_x_y, ConstF32(1.f))};
const Id g1{OpFMul(f32_id, ProcTexNoiseRand2D(OpFAdd(vec_ids.Get(2), point, ConstF32(1.f, 0.f))),
frac_x_y_min_one)};
const Id g2{OpFMul(f32_id, ProcTexNoiseRand2D(OpFAdd(vec_ids.Get(2), point, ConstF32(0.f, 1.f))),
frac_x_y_min_one)};
const Id frac_x_y_min_two{OpFSub(f32_id, frac_x_y, ConstF32(2.f))};
const Id g3{OpFMul(f32_id, ProcTexNoiseRand2D(OpFAdd(vec_ids.Get(2), point, ConstF32(1.f, 1.f))),
frac_x_y_min_two)};
const Id proctex_noise_lut_offset{GetShaderDataMember(i32_id, ConstS32(11))};
const Id x_noise{ProcTexLookupLUT(proctex_noise_lut_offset, frac_x)};
const Id y_noise{ProcTexLookupLUT(proctex_noise_lut_offset, frac_y)};
const Id x0{OpFMix(f32_id, g0, g1, x_noise)};
const Id x1{OpFMix(f32_id, g2, g3, x_noise)};
return OpFMix(f32_id, x0, x1, y_noise);
}
Id FragmentModule::SampleProcTexColor(Id lut_coord, Id level) {
const Id lut_width{OpShiftRightArithmetic(i32_id, ConstS32(config.state.proctex.lut_width), level)};
const Id lut_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
// Offsets for level 4-7 seem to be hardcoded
InitTableS32(lut_offsets, config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
config.state.proctex.lut_offset2, config.state.proctex.lut_offset3,
0xF0, 0xF8, 0xFC, 0xFE);
const Id lut_offset{OpLoad(i32_id, OpAccessChain(lut_ptr, lut_offsets, level))};
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
lut_coord = OpFMul(f32_id, lut_coord, OpConvertSToF(f32_id, OpISub(i32_id, lut_width, ConstS32(1))));
if (!Sirit::ValidId(texture_buffer_lut_rgba)) {
const Id sampled_image{TypeSampledImage(image_buffer_id)};
texture_buffer_lut_rgba = OpLoad(sampled_image, texture_buffer_lut_rgba_id);
}
const Id proctex_lut_offset{GetShaderDataMember(i32_id, ConstS32(14))};
const Id lut_rgba{OpImage(image_buffer_id, texture_buffer_lut_rgba)};
switch (config.state.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::LinearMipmapLinear:
case ProcTexFilter::LinearMipmapNearest: {
const Id lut_index_i{OpIAdd(i32_id, OpConvertFToS(i32_id, lut_coord), lut_offset)};
const Id lut_index_f{OpFract(f32_id, lut_coord)};
const Id proctex_diff_lut_offset{GetShaderDataMember(i32_id, ConstS32(15))};
const Id p1{OpIAdd(i32_id, lut_index_i, proctex_lut_offset)};
const Id p2{OpIAdd(i32_id, lut_index_i, proctex_diff_lut_offset)};
const Id texel1{OpImageFetch(vec_ids.Get(4), lut_rgba, p1)};
const Id texel2{OpImageFetch(vec_ids.Get(4), lut_rgba, p2)};
return OpFAdd(vec_ids.Get(4), texel1, OpVectorTimesScalar(vec_ids.Get(4), texel2, lut_index_f));
}
case ProcTexFilter::Nearest:
case ProcTexFilter::NearestMipmapLinear:
case ProcTexFilter::NearestMipmapNearest: {
lut_coord = OpFAdd(f32_id, lut_coord, OpConvertSToF(f32_id, lut_offset));
const Id lut_coord_rounded{OpConvertFToS(i32_id, OpRound(f32_id, lut_coord))};
const Id p{OpIAdd(i32_id, lut_coord_rounded, proctex_lut_offset)};
return OpImageFetch(vec_ids.Get(4), lut_rgba, p);
}
}
return Id{};
}
Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) { Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) {
// Only load the texture buffer lut once // Only load the texture buffer lut once
if (!Sirit::ValidId(texture_buffer_lut_lf)) { if (!Sirit::ValidId(texture_buffer_lut_lf)) {
@@ -1022,8 +1340,6 @@ void FragmentModule::DefineInterface() {
Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
} }
static int i = 0;
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) { std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) {
FragmentModule module{config}; FragmentModule module{config};
module.Generate(); module.Generate();

View File

@@ -41,6 +41,9 @@ public:
/// Writes the code to emulate the specified TEV stage /// Writes the code to emulate the specified TEV stage
void WriteTevStage(s32 index); void WriteTevStage(s32 index);
/// Defines the tex3 proctex sampling function
void DefineProcTexSampler();
/// Writes the if-statement condition used to evaluate alpha testing. /// Writes the if-statement condition used to evaluate alpha testing.
/// Returns true if the fragment was discarded /// Returns true if the fragment was discarded
[[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func); [[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
@@ -51,9 +54,29 @@ public:
/// Samples the current fragment texel from shadow plane /// Samples the current fragment texel from shadow plane
[[nodiscard]] Id SampleShadow(); [[nodiscard]] Id SampleShadow();
[[nodiscard]] Id AppendProcTexShiftOffset(Id v, Pica::TexturingRegs::ProcTexShift mode,
Pica::TexturingRegs::ProcTexClamp clamp_mode);
[[nodiscard]] Id AppendProcTexClamp(Id var, Pica::TexturingRegs::ProcTexClamp mode);
[[nodiscard]] Id AppendProcTexCombineAndMap(Pica::TexturingRegs::ProcTexCombiner combiner,
Id u, Id v, Id offset);
/// Rounds the provided variable to the nearest 1/255th /// Rounds the provided variable to the nearest 1/255th
[[nodiscard]] Id Byteround(Id variable_id, u32 size = 1); [[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
/// LUT sampling uitlity
/// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
/// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
/// value entries and difference entries.
[[nodiscard]] Id ProcTexLookupLUT(Id offset, Id coord);
/// Generates random noise with proctex
[[nodiscard]] Id ProcTexNoiseCoef(Id x);
/// Samples a color value from the rgba texture lut
[[nodiscard]] Id SampleProcTexColor(Id lut_coord, Id level);
/// Lookups the lighting LUT at the provided lut_index /// Lookups the lighting LUT at the provided lut_index
[[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta); [[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
@@ -73,9 +96,15 @@ public:
/// Writes the combiner function for the alpha component for the specified TEV stage operation /// Writes the combiner function for the alpha component for the specified TEV stage operation
[[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation); [[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
bool dump_shader{false};
private: private:
/// Creates a constant array of integers
template <typename... T>
void InitTableS32(Id table, T... elems) {
const Id table_const{ConstS32(elems...)};
OpStore(table, table_const);
};
/// Loads the member specified from the shader_data uniform struct /// Loads the member specified from the shader_data uniform struct
template <typename... Ids> template <typename... Ids>
[[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) { [[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
@@ -114,9 +143,11 @@ private:
return uniform_id; return uniform_id;
} }
template <bool global = true>
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) { [[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
const Id pointer_type_id{TypePointer(storage_class, type)}; const Id pointer_type_id{TypePointer(storage_class, type)};
return AddGlobalVariable(pointer_type_id, storage_class); return global ? AddGlobalVariable(pointer_type_id, storage_class)
: AddLocalVariable(pointer_type_id, storage_class);
} }
/// Returns the id of a signed integer constant of value /// Returns the id of a signed integer constant of value
@@ -126,10 +157,11 @@ private:
template <typename... Args> template <typename... Args>
[[nodiscard]] Id ConstU32(Args&&... values) { [[nodiscard]] Id ConstU32(Args&&... values) {
constexpr auto size = sizeof...(values); constexpr u32 size = static_cast<u32>(sizeof...(values));
static_assert(size >= 2 && size <= 4); static_assert(size >= 2);
const std::array constituents{Constant(u32_id, values)...}; const std::array constituents{Constant(u32_id, values)...};
return ConstantComposite(uvec_ids.Get(size), constituents); const Id type = size <= 4 ? uvec_ids.Get(size) : TypeArray(u32_id, ConstU32(size));
return ConstantComposite(type, constituents);
} }
/// Returns the id of a signed integer constant of value /// Returns the id of a signed integer constant of value
@@ -139,10 +171,11 @@ private:
template <typename... Args> template <typename... Args>
[[nodiscard]] Id ConstS32(Args&&... values) { [[nodiscard]] Id ConstS32(Args&&... values) {
constexpr auto size = sizeof...(values); constexpr u32 size = static_cast<u32>(sizeof...(values));
static_assert(size >= 2 && size <= 4); static_assert(size >= 2);
const std::array constituents{Constant(i32_id, values)...}; const std::array constituents{Constant(i32_id, values)...};
return ConstantComposite(ivec_ids.Get(size), constituents); const Id type = size <= 4 ? ivec_ids.Get(size) : TypeArray(i32_id, ConstU32(size));
return ConstantComposite(type, constituents);
} }
/// Returns the id of a float constant of value /// Returns the id of a float constant of value
@@ -152,10 +185,11 @@ private:
template <typename... Args> template <typename... Args>
[[nodiscard]] Id ConstF32(Args... values) { [[nodiscard]] Id ConstF32(Args... values) {
constexpr auto size = sizeof...(values); constexpr u32 size = static_cast<u32>(sizeof...(values));
static_assert(size >= 2 && size <= 4); static_assert(size >= 2);
const std::array constituents{Constant(f32_id, values)...}; const std::array constituents{Constant(f32_id, values)...};
return ConstantComposite(vec_ids.Get(size), constituents); const Id type = size <= 4 ? vec_ids.Get(size) : TypeArray(f32_id, ConstU32(size));
return ConstantComposite(type, constituents);
} }
void DefineArithmeticTypes(); void DefineArithmeticTypes();
@@ -215,6 +249,8 @@ private:
Id shadow_texture_nz_id{}; Id shadow_texture_nz_id{};
Id texture_buffer_lut_lf{}; Id texture_buffer_lut_lf{};
Id texture_buffer_lut_rg{};
Id texture_buffer_lut_rgba{};
Id rounded_primary_color{}; Id rounded_primary_color{};
Id primary_fragment_color{}; Id primary_fragment_color{};
@@ -229,6 +265,11 @@ private:
Id alpha_results_1{}; Id alpha_results_1{};
Id alpha_results_2{}; Id alpha_results_2{};
Id alpha_results_3{}; Id alpha_results_3{};
Id proctex_func{};
Id noise1d_table{};
Id noise2d_table{};
Id lut_offsets{};
}; };
/** /**

View File

@@ -179,8 +179,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
includer)) { includer)) {
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
shader->getInfoDebugLog()); shader->getInfoDebugLog());
LOG_CRITICAL(Render_Vulkan, "{}", code); fmt::print("{}", code);
ASSERT(false);
return VK_NULL_HANDLE; return VK_NULL_HANDLE;
} }