vk_shader_gen_spv: Implement proctex sampler
* Fixes MHS menu and probably other games
This commit is contained in:
@@ -1077,7 +1077,7 @@ float ProcTexNoiseRand2D(vec2 point) {
|
|||||||
v2 += 10 + u2;
|
v2 += 10 + u2;
|
||||||
v2 &= 0xF;
|
v2 &= 0xF;
|
||||||
v2 ^= table[u2];
|
v2 ^= table[u2];
|
||||||
return -1.0 + float(v2) * 2.0/ 15.0;
|
return -1.0 + float(v2) * (2.0/15.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
float ProcTexNoiseCoef(vec2 x) {
|
float ProcTexNoiseCoef(vec2 x) {
|
||||||
|
@@ -115,11 +115,11 @@ struct PicaFSConfigState {
|
|||||||
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
|
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
|
||||||
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
|
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
|
||||||
};
|
};
|
||||||
u8 lut_width;
|
s32 lut_width;
|
||||||
u8 lut_offset0;
|
s32 lut_offset0;
|
||||||
u8 lut_offset1;
|
s32 lut_offset1;
|
||||||
u8 lut_offset2;
|
s32 lut_offset2;
|
||||||
u8 lut_offset3;
|
s32 lut_offset3;
|
||||||
u8 lod_min;
|
u8 lod_min;
|
||||||
u8 lod_max;
|
u8 lod_max;
|
||||||
} proctex;
|
} proctex;
|
||||||
|
@@ -20,6 +20,9 @@ FragmentModule::FragmentModule(const PicaFSConfig& config) : Sirit::Module{0x000
|
|||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
DefineUniformStructs();
|
DefineUniformStructs();
|
||||||
DefineInterface();
|
DefineInterface();
|
||||||
|
if (config.state.proctex.enable) {
|
||||||
|
DefineProcTexSampler();
|
||||||
|
}
|
||||||
DefineEntryPoint();
|
DefineEntryPoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -505,9 +508,15 @@ void FragmentModule::WriteTevStage(s32 index) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using ProcTexClamp = TexturingRegs::ProcTexClamp;
|
||||||
|
using ProcTexShift = TexturingRegs::ProcTexShift;
|
||||||
|
using ProcTexCombiner = TexturingRegs::ProcTexCombiner;
|
||||||
|
using ProcTexFilter = TexturingRegs::ProcTexFilter;
|
||||||
|
|
||||||
bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) {
|
bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) {
|
||||||
using CompareFunc = FramebufferRegs::CompareFunc;
|
using CompareFunc = FramebufferRegs::CompareFunc;
|
||||||
|
|
||||||
|
// The compare func is to keep the fragment so we invert it to discard it
|
||||||
const auto Compare = [this, func](Id alpha, Id alphatest_ref) {
|
const auto Compare = [this, func](Id alpha, Id alphatest_ref) {
|
||||||
switch (func) {
|
switch (func) {
|
||||||
case CompareFunc::Equal:
|
case CompareFunc::Equal:
|
||||||
@@ -625,13 +634,14 @@ Id FragmentModule::SampleTexture(u32 texture_unit) {
|
|||||||
case 1:
|
case 1:
|
||||||
return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id);
|
return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id);
|
||||||
case 2:
|
case 2:
|
||||||
if (state.texture2_use_coord1)
|
if (state.texture2_use_coord1) {
|
||||||
return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id);
|
return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id);
|
||||||
else
|
} else {
|
||||||
return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id);
|
return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id);
|
||||||
|
}
|
||||||
case 3:
|
case 3:
|
||||||
if (false && state.proctex.enable) {
|
if (state.proctex.enable) {
|
||||||
//return "ProcTex()";
|
return OpFunctionCall(vec_ids.Get(4), proctex_func);
|
||||||
} else {
|
} else {
|
||||||
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
|
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
|
||||||
return zero_vec;
|
return zero_vec;
|
||||||
@@ -645,13 +655,12 @@ Id FragmentModule::SampleTexture(u32 texture_unit) {
|
|||||||
Id FragmentModule::CompareShadow(Id pixel, Id z) {
|
Id FragmentModule::CompareShadow(Id pixel, Id z) {
|
||||||
const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))};
|
const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))};
|
||||||
const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))};
|
const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))};
|
||||||
const Id s8_mul{OpFMul(f32_id, pixel_s8, ConstF32(1.f / 255.f))};
|
const Id s8_f32{OpFMul(f32_id, pixel_s8, ConstF32(1.f / 255.f))};
|
||||||
const Id d24_leq_z{OpULessThanEqual(bool_id, pixel_d24, z)};
|
const Id d24_leq_z{OpULessThanEqual(bool_id, pixel_d24, z)};
|
||||||
return OpSelect(f32_id, d24_leq_z, ConstF32(0.f), s8_mul);
|
return OpSelect(f32_id, d24_leq_z, ConstF32(0.f), s8_f32);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id FragmentModule::SampleShadow() {
|
Id FragmentModule::SampleShadow() {
|
||||||
dump_shader = true;
|
|
||||||
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
|
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
|
||||||
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
||||||
const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w),
|
const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w),
|
||||||
@@ -701,6 +710,200 @@ Id FragmentModule::SampleShadow() {
|
|||||||
return OpCompositeConstruct(vec_ids.Get(4), val, val, val, val);
|
return OpCompositeConstruct(vec_ids.Get(4), val, val, val, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id FragmentModule::AppendProcTexShiftOffset(Id v, ProcTexShift mode, ProcTexClamp clamp_mode) {
|
||||||
|
const Id offset{clamp_mode == ProcTexClamp::MirroredRepeat ? ConstF32(1.f) : ConstF32(0.5f)};
|
||||||
|
const Id v_i32{OpConvertFToS(i32_id, v)};
|
||||||
|
|
||||||
|
const auto Shift = [&](bool even) -> Id {
|
||||||
|
const Id temp1{OpSDiv(i32_id, even ? OpIAdd(i32_id, v_i32, ConstS32(1)) : v_i32, ConstS32(2))};
|
||||||
|
const Id temp2{OpConvertSToF(f32_id, OpSMod(i32_id, temp1, ConstS32(2)))};
|
||||||
|
return OpFMul(f32_id, offset, temp2);
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case ProcTexShift::None:
|
||||||
|
return ConstF32(0.f);
|
||||||
|
case ProcTexShift::Odd:
|
||||||
|
return Shift(false);
|
||||||
|
case ProcTexShift::Even:
|
||||||
|
return Shift(true);
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(Render_Vulkan, "Unknown shift mode {}", mode);
|
||||||
|
return ConstF32(0.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id FragmentModule::AppendProcTexClamp(Id var, ProcTexClamp mode) {
|
||||||
|
const Id zero{ConstF32(0.f)};
|
||||||
|
const Id one{ConstF32(1.f)};
|
||||||
|
|
||||||
|
const auto MirroredRepeat = [&]() -> Id {
|
||||||
|
const Id fract{OpFract(f32_id, var)};
|
||||||
|
const Id cond{OpIEqual(bool_id, OpSMod(i32_id, OpConvertFToS(i32_id, var), ConstS32(2)), ConstS32(0))};
|
||||||
|
return OpSelect(f32_id, cond, fract, OpFSub(f32_id, one, fract));
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case ProcTexClamp::ToZero:
|
||||||
|
return OpSelect(f32_id, OpFOrdGreaterThan(bool_id, var, one), zero, var);
|
||||||
|
case ProcTexClamp::ToEdge:
|
||||||
|
return OpFMin(f32_id, var, one);
|
||||||
|
case ProcTexClamp::SymmetricalRepeat:
|
||||||
|
return OpFract(f32_id, var);
|
||||||
|
case ProcTexClamp::MirroredRepeat:
|
||||||
|
return MirroredRepeat();
|
||||||
|
case ProcTexClamp::Pulse:
|
||||||
|
return OpSelect(f32_id, OpFOrdGreaterThan(bool_id, var, ConstF32(0.5f)), one, zero);
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(Render_Vulkan, "Unknown clamp mode {}", mode);
|
||||||
|
return OpFMin(f32_id, var, one);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id v, Id offset) {
|
||||||
|
const auto combined = [&]() -> Id {
|
||||||
|
const Id u2v2{OpFma(f32_id, u, u, OpFMul(f32_id, v, v))};
|
||||||
|
switch (combiner) {
|
||||||
|
case ProcTexCombiner::U:
|
||||||
|
return u;
|
||||||
|
case ProcTexCombiner::U2:
|
||||||
|
return OpFMul(f32_id, u, u);
|
||||||
|
case TexturingRegs::ProcTexCombiner::V:
|
||||||
|
return v;
|
||||||
|
case TexturingRegs::ProcTexCombiner::V2:
|
||||||
|
return OpFMul(f32_id, v, v);
|
||||||
|
case TexturingRegs::ProcTexCombiner::Add:
|
||||||
|
return OpFMul(f32_id, OpFAdd(f32_id, u, v), ConstF32(0.5f));
|
||||||
|
case TexturingRegs::ProcTexCombiner::Add2:
|
||||||
|
return OpFMul(f32_id, u2v2, ConstF32(0.5f));
|
||||||
|
case TexturingRegs::ProcTexCombiner::SqrtAdd2:
|
||||||
|
return OpFMin(f32_id, OpSqrt(f32_id, u2v2), ConstF32(1.f));
|
||||||
|
case TexturingRegs::ProcTexCombiner::Min:
|
||||||
|
return OpFMin(f32_id, u, v);
|
||||||
|
case TexturingRegs::ProcTexCombiner::Max:
|
||||||
|
return OpFMax(f32_id, u, v);
|
||||||
|
case TexturingRegs::ProcTexCombiner::RMax: {
|
||||||
|
const Id r{OpFma(f32_id, OpFAdd(f32_id, u, v), ConstF32(0.5f), OpSqrt(f32_id, u2v2))};
|
||||||
|
return OpFMin(f32_id, OpFMul(f32_id, r, ConstF32(0.5f)), ConstF32(1.f));
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(Render_Vulkan, "Unknown combiner {}", combiner);
|
||||||
|
return ConstF32(0.f);
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
return ProcTexLookupLUT(offset, combined);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FragmentModule::DefineProcTexSampler() {
|
||||||
|
const Id func_type{TypeFunction(vec_ids.Get(4))};
|
||||||
|
proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
|
||||||
|
AddLabel(OpLabel());
|
||||||
|
|
||||||
|
// Define noise tables at the beginning of the function
|
||||||
|
if (config.state.proctex.noise_enable) {
|
||||||
|
noise1d_table = DefineVar<false>(TypeArray(i32_id, ConstU32(16u)), spv::StorageClass::Function);
|
||||||
|
noise2d_table = DefineVar<false>(TypeArray(i32_id, ConstU32(16u)), spv::StorageClass::Function);
|
||||||
|
}
|
||||||
|
lut_offsets = DefineVar<false>(TypeArray(i32_id, ConstU32(8u)), spv::StorageClass::Function);
|
||||||
|
|
||||||
|
Id uv{};
|
||||||
|
if (config.state.proctex.coord < 3) {
|
||||||
|
Id texcoord_id{};
|
||||||
|
switch (config.state.proctex.coord.Value()) {
|
||||||
|
case 0: texcoord_id = texcoord0_id; break;
|
||||||
|
case 1: texcoord_id = texcoord1_id; break;
|
||||||
|
case 2: texcoord_id = texcoord2_id; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
|
||||||
|
uv = OpFAbs(vec_ids.Get(2), texcoord);
|
||||||
|
} else {
|
||||||
|
LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3");
|
||||||
|
uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id));
|
||||||
|
}
|
||||||
|
|
||||||
|
// This LOD formula is the same as the LOD upper limit defined in OpenGL.
|
||||||
|
// f(x, y) <= m_u + m_v + m_w
|
||||||
|
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
|
||||||
|
// Note: this is different from the one normal 2D textures use.
|
||||||
|
const Id uv_1{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), uv))};
|
||||||
|
const Id uv_2{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), uv))};
|
||||||
|
const Id duv{OpFMax(vec_ids.Get(2), uv_1, uv_2)};
|
||||||
|
|
||||||
|
// unlike normal texture, the bias is inside the log2
|
||||||
|
const Id proctex_bias{GetShaderDataMember(f32_id, ConstS32(16))};
|
||||||
|
const Id bias{OpFMul(f32_id, ConstF32(static_cast<f32>(config.state.proctex.lut_width)), proctex_bias)};
|
||||||
|
const Id duv_xy{OpFAdd(f32_id, OpCompositeExtract(f32_id, duv, 0), OpCompositeExtract(f32_id, duv, 1))};
|
||||||
|
|
||||||
|
Id lod{OpLog2(f32_id, OpFMul(f32_id, OpFAbs(f32_id, bias), duv_xy))};
|
||||||
|
lod = OpSelect(f32_id, OpFOrdEqual(bool_id, proctex_bias, ConstF32(0.f)), ConstF32(0.f), lod);
|
||||||
|
lod = OpFClamp(f32_id, lod, ConstF32(std::max(0.0f, static_cast<float>(config.state.proctex.lod_min))),
|
||||||
|
ConstF32(std::min(7.0f, static_cast<float>(config.state.proctex.lod_max))));
|
||||||
|
|
||||||
|
// Get shift offset before noise generation
|
||||||
|
const Id u_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 1),
|
||||||
|
config.state.proctex.u_shift,
|
||||||
|
config.state.proctex.u_clamp)};
|
||||||
|
const Id v_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 0),
|
||||||
|
config.state.proctex.v_shift,
|
||||||
|
config.state.proctex.v_clamp)};
|
||||||
|
|
||||||
|
// Generate noise
|
||||||
|
if (config.state.proctex.noise_enable) {
|
||||||
|
const Id proctex_noise_a{GetShaderDataMember(vec_ids.Get(2), ConstS32(22))};
|
||||||
|
const Id noise_coef{ProcTexNoiseCoef(uv)};
|
||||||
|
uv = OpFAdd(vec_ids.Get(2), uv, OpVectorTimesScalar(vec_ids.Get(2), proctex_noise_a, noise_coef));
|
||||||
|
uv = OpFAbs(vec_ids.Get(2), uv);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shift
|
||||||
|
Id u{OpFAdd(f32_id, OpCompositeExtract(f32_id, uv, 0), u_shift)};
|
||||||
|
Id v{OpFAdd(f32_id, OpCompositeExtract(f32_id, uv, 1), v_shift)};
|
||||||
|
|
||||||
|
// Clamp
|
||||||
|
u = AppendProcTexClamp(u, config.state.proctex.u_clamp);
|
||||||
|
v = AppendProcTexClamp(v, config.state.proctex.v_clamp);
|
||||||
|
|
||||||
|
// Combine and map
|
||||||
|
const Id proctex_color_map_offset{GetShaderDataMember(i32_id, ConstS32(12))};
|
||||||
|
const Id lut_coord{AppendProcTexCombineAndMap(config.state.proctex.color_combiner,
|
||||||
|
u, v, proctex_color_map_offset)};
|
||||||
|
|
||||||
|
Id final_color{};
|
||||||
|
switch (config.state.proctex.lut_filter) {
|
||||||
|
case ProcTexFilter::Linear:
|
||||||
|
case ProcTexFilter::Nearest: {
|
||||||
|
final_color = SampleProcTexColor(lut_coord, ConstS32(0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ProcTexFilter::NearestMipmapNearest:
|
||||||
|
case ProcTexFilter::LinearMipmapNearest: {
|
||||||
|
final_color = SampleProcTexColor(lut_coord, OpConvertFToS(i32_id, OpRound(f32_id, lod)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ProcTexFilter::NearestMipmapLinear:
|
||||||
|
case ProcTexFilter::LinearMipmapLinear: {
|
||||||
|
const Id lod_i{OpConvertFToS(i32_id, lod)};
|
||||||
|
const Id lod_f{OpFract(f32_id, lod)};
|
||||||
|
const Id color1{SampleProcTexColor(lut_coord, lod_i)};
|
||||||
|
const Id color2{SampleProcTexColor(lut_coord, OpIAdd(i32_id, lod_i, ConstS32(1)))};
|
||||||
|
final_color = OpFMix(f32_id, color1, color2, lod_f);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.state.proctex.separate_alpha) {
|
||||||
|
const Id proctex_alpha_map_offset{GetShaderDataMember(i32_id, ConstS32(13))};
|
||||||
|
const Id final_alpha{AppendProcTexCombineAndMap(config.state.proctex.alpha_combiner, u, v,
|
||||||
|
proctex_alpha_map_offset)};
|
||||||
|
final_color = OpCompositeConstruct(vec_ids.Get(4), final_color, final_alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
OpReturnValue(final_color);
|
||||||
|
OpFunctionEnd();
|
||||||
|
}
|
||||||
|
|
||||||
Id FragmentModule::Byteround(Id variable_id, u32 size) {
|
Id FragmentModule::Byteround(Id variable_id, u32 size) {
|
||||||
if (size > 1) {
|
if (size > 1) {
|
||||||
const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))};
|
const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))};
|
||||||
@@ -712,6 +915,121 @@ Id FragmentModule::Byteround(Id variable_id, u32 size) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id FragmentModule::ProcTexLookupLUT(Id offset, Id coord) {
|
||||||
|
coord = OpFMul(f32_id, coord, ConstF32(128.f));
|
||||||
|
const Id index_i{OpFClamp(f32_id, OpFloor(f32_id, coord), ConstF32(0.f), ConstF32(127.0f))};
|
||||||
|
const Id index_f{OpFSub(f32_id, coord, index_i)};
|
||||||
|
const Id p{OpIAdd(i32_id, OpConvertFToS(i32_id, index_i), offset)};
|
||||||
|
if (!Sirit::ValidId(texture_buffer_lut_rg)) {
|
||||||
|
const Id sampled_image{TypeSampledImage(image_buffer_id)};
|
||||||
|
texture_buffer_lut_rg = OpLoad(sampled_image, texture_buffer_lut_rg_id);
|
||||||
|
}
|
||||||
|
const Id entry{OpImageFetch(vec_ids.Get(4), OpImage(image_buffer_id, texture_buffer_lut_rg), p)};
|
||||||
|
const Id entry_r{OpCompositeExtract(f32_id, entry, 0)};
|
||||||
|
const Id entry_g{OpCompositeExtract(f32_id, entry, 1)};
|
||||||
|
return OpFClamp(f32_id, OpFma(f32_id, entry_g, index_f, entry_r), ConstF32(0.f), ConstF32(1.f));
|
||||||
|
};
|
||||||
|
|
||||||
|
Id FragmentModule::ProcTexNoiseCoef(Id x) {
|
||||||
|
// Noise utility
|
||||||
|
const auto ProcTexNoiseRand1D = [&](Id v) -> Id {
|
||||||
|
InitTableS32(noise1d_table, 0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11);
|
||||||
|
const Id table_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
|
||||||
|
const Id left_tmp{OpIAdd(i32_id, OpSMod(i32_id, v, ConstS32(9)), ConstS32(2))};
|
||||||
|
const Id left{OpBitwiseAnd(i32_id, OpIMul(i32_id, left_tmp, ConstS32(3)), ConstS32(0xF))};
|
||||||
|
const Id table_index{OpBitwiseAnd(i32_id, OpSDiv(i32_id, v, ConstS32(9)), ConstS32(0xF))};
|
||||||
|
const Id table_value{OpLoad(i32_id, OpAccessChain(table_ptr, noise1d_table, table_index))};
|
||||||
|
return OpBitwiseXor(i32_id, left, table_value);
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto ProcTexNoiseRand2D = [&](Id point) -> Id {
|
||||||
|
InitTableS32(noise2d_table, 10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14);
|
||||||
|
const Id table_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
|
||||||
|
const Id point_x{OpConvertFToS(i32_id, OpCompositeExtract(f32_id, point, 0))};
|
||||||
|
const Id point_y{OpConvertFToS(i32_id, OpCompositeExtract(f32_id, point, 1))};
|
||||||
|
const Id u2{ProcTexNoiseRand1D(point_x)};
|
||||||
|
const Id cond{OpIEqual(bool_id, OpBitwiseAnd(i32_id, u2, ConstS32(3)), ConstS32(1))};
|
||||||
|
const Id table_value{OpLoad(i32_id, OpAccessChain(table_ptr, noise2d_table, u2))};
|
||||||
|
Id v2{ProcTexNoiseRand1D(point_y)};
|
||||||
|
v2 = OpIAdd(i32_id, v2, OpSelect(i32_id, cond, ConstS32(4), ConstS32(0)));
|
||||||
|
v2 = OpBitwiseXor(i32_id, v2, OpIMul(i32_id, OpBitwiseAnd(i32_id, u2, ConstS32(1)), ConstS32(6)));
|
||||||
|
v2 = OpIAdd(i32_id, v2, OpIAdd(i32_id, u2, ConstS32(10)));
|
||||||
|
v2 = OpBitwiseAnd(i32_id, v2, ConstS32(0xF));
|
||||||
|
v2 = OpBitwiseXor(i32_id, v2, table_value);
|
||||||
|
return OpFma(f32_id, OpConvertSToF(f32_id, v2), ConstF32(2.f / 15.f), ConstF32(-1.f));
|
||||||
|
};
|
||||||
|
|
||||||
|
const Id proctex_noise_f{GetShaderDataMember(vec_ids.Get(2), ConstS32(21))};
|
||||||
|
const Id proctex_noise_p{GetShaderDataMember(vec_ids.Get(2), ConstS32(23))};
|
||||||
|
const Id grid{OpFMul(vec_ids.Get(2), OpVectorTimesScalar(vec_ids.Get(2), proctex_noise_f, ConstF32(9.f)),
|
||||||
|
OpFAbs(vec_ids.Get(2), OpFAdd(vec_ids.Get(2), x, proctex_noise_p)))};
|
||||||
|
const Id point{OpFloor(vec_ids.Get(2), grid)};
|
||||||
|
const Id frac{OpFSub(vec_ids.Get(2), grid, point)};
|
||||||
|
const Id frac_x{OpCompositeExtract(f32_id, frac, 0)};
|
||||||
|
const Id frac_y{OpCompositeExtract(f32_id, frac, 1)};
|
||||||
|
const Id frac_x_y{OpFAdd(f32_id, frac_x, frac_y)};
|
||||||
|
const Id g0{OpFMul(f32_id, ProcTexNoiseRand2D(point), frac_x_y)};
|
||||||
|
const Id frac_x_y_min_one{OpFSub(f32_id, frac_x_y, ConstF32(1.f))};
|
||||||
|
const Id g1{OpFMul(f32_id, ProcTexNoiseRand2D(OpFAdd(vec_ids.Get(2), point, ConstF32(1.f, 0.f))),
|
||||||
|
frac_x_y_min_one)};
|
||||||
|
const Id g2{OpFMul(f32_id, ProcTexNoiseRand2D(OpFAdd(vec_ids.Get(2), point, ConstF32(0.f, 1.f))),
|
||||||
|
frac_x_y_min_one)};
|
||||||
|
const Id frac_x_y_min_two{OpFSub(f32_id, frac_x_y, ConstF32(2.f))};
|
||||||
|
const Id g3{OpFMul(f32_id, ProcTexNoiseRand2D(OpFAdd(vec_ids.Get(2), point, ConstF32(1.f, 1.f))),
|
||||||
|
frac_x_y_min_two)};
|
||||||
|
const Id proctex_noise_lut_offset{GetShaderDataMember(i32_id, ConstS32(11))};
|
||||||
|
const Id x_noise{ProcTexLookupLUT(proctex_noise_lut_offset, frac_x)};
|
||||||
|
const Id y_noise{ProcTexLookupLUT(proctex_noise_lut_offset, frac_y)};
|
||||||
|
const Id x0{OpFMix(f32_id, g0, g1, x_noise)};
|
||||||
|
const Id x1{OpFMix(f32_id, g2, g3, x_noise)};
|
||||||
|
return OpFMix(f32_id, x0, x1, y_noise);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id FragmentModule::SampleProcTexColor(Id lut_coord, Id level) {
|
||||||
|
const Id lut_width{OpShiftRightArithmetic(i32_id, ConstS32(config.state.proctex.lut_width), level)};
|
||||||
|
const Id lut_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
|
||||||
|
// Offsets for level 4-7 seem to be hardcoded
|
||||||
|
InitTableS32(lut_offsets, config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
|
||||||
|
config.state.proctex.lut_offset2, config.state.proctex.lut_offset3,
|
||||||
|
0xF0, 0xF8, 0xFC, 0xFE);
|
||||||
|
const Id lut_offset{OpLoad(i32_id, OpAccessChain(lut_ptr, lut_offsets, level))};
|
||||||
|
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
|
||||||
|
lut_coord = OpFMul(f32_id, lut_coord, OpConvertSToF(f32_id, OpISub(i32_id, lut_width, ConstS32(1))));
|
||||||
|
|
||||||
|
if (!Sirit::ValidId(texture_buffer_lut_rgba)) {
|
||||||
|
const Id sampled_image{TypeSampledImage(image_buffer_id)};
|
||||||
|
texture_buffer_lut_rgba = OpLoad(sampled_image, texture_buffer_lut_rgba_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Id proctex_lut_offset{GetShaderDataMember(i32_id, ConstS32(14))};
|
||||||
|
const Id lut_rgba{OpImage(image_buffer_id, texture_buffer_lut_rgba)};
|
||||||
|
|
||||||
|
switch (config.state.proctex.lut_filter) {
|
||||||
|
case ProcTexFilter::Linear:
|
||||||
|
case ProcTexFilter::LinearMipmapLinear:
|
||||||
|
case ProcTexFilter::LinearMipmapNearest: {
|
||||||
|
const Id lut_index_i{OpIAdd(i32_id, OpConvertFToS(i32_id, lut_coord), lut_offset)};
|
||||||
|
const Id lut_index_f{OpFract(f32_id, lut_coord)};
|
||||||
|
const Id proctex_diff_lut_offset{GetShaderDataMember(i32_id, ConstS32(15))};
|
||||||
|
const Id p1{OpIAdd(i32_id, lut_index_i, proctex_lut_offset)};
|
||||||
|
const Id p2{OpIAdd(i32_id, lut_index_i, proctex_diff_lut_offset)};
|
||||||
|
const Id texel1{OpImageFetch(vec_ids.Get(4), lut_rgba, p1)};
|
||||||
|
const Id texel2{OpImageFetch(vec_ids.Get(4), lut_rgba, p2)};
|
||||||
|
return OpFAdd(vec_ids.Get(4), texel1, OpVectorTimesScalar(vec_ids.Get(4), texel2, lut_index_f));
|
||||||
|
}
|
||||||
|
case ProcTexFilter::Nearest:
|
||||||
|
case ProcTexFilter::NearestMipmapLinear:
|
||||||
|
case ProcTexFilter::NearestMipmapNearest: {
|
||||||
|
lut_coord = OpFAdd(f32_id, lut_coord, OpConvertSToF(f32_id, lut_offset));
|
||||||
|
const Id lut_coord_rounded{OpConvertFToS(i32_id, OpRound(f32_id, lut_coord))};
|
||||||
|
const Id p{OpIAdd(i32_id, lut_coord_rounded, proctex_lut_offset)};
|
||||||
|
return OpImageFetch(vec_ids.Get(4), lut_rgba, p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Id{};
|
||||||
|
}
|
||||||
|
|
||||||
Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) {
|
Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) {
|
||||||
// Only load the texture buffer lut once
|
// Only load the texture buffer lut once
|
||||||
if (!Sirit::ValidId(texture_buffer_lut_lf)) {
|
if (!Sirit::ValidId(texture_buffer_lut_lf)) {
|
||||||
@@ -1022,8 +1340,6 @@ void FragmentModule::DefineInterface() {
|
|||||||
Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
|
Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int i = 0;
|
|
||||||
|
|
||||||
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) {
|
std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) {
|
||||||
FragmentModule module{config};
|
FragmentModule module{config};
|
||||||
module.Generate();
|
module.Generate();
|
||||||
|
@@ -41,6 +41,9 @@ public:
|
|||||||
/// Writes the code to emulate the specified TEV stage
|
/// Writes the code to emulate the specified TEV stage
|
||||||
void WriteTevStage(s32 index);
|
void WriteTevStage(s32 index);
|
||||||
|
|
||||||
|
/// Defines the tex3 proctex sampling function
|
||||||
|
void DefineProcTexSampler();
|
||||||
|
|
||||||
/// Writes the if-statement condition used to evaluate alpha testing.
|
/// Writes the if-statement condition used to evaluate alpha testing.
|
||||||
/// Returns true if the fragment was discarded
|
/// Returns true if the fragment was discarded
|
||||||
[[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
|
[[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
|
||||||
@@ -51,9 +54,29 @@ public:
|
|||||||
/// Samples the current fragment texel from shadow plane
|
/// Samples the current fragment texel from shadow plane
|
||||||
[[nodiscard]] Id SampleShadow();
|
[[nodiscard]] Id SampleShadow();
|
||||||
|
|
||||||
|
[[nodiscard]] Id AppendProcTexShiftOffset(Id v, Pica::TexturingRegs::ProcTexShift mode,
|
||||||
|
Pica::TexturingRegs::ProcTexClamp clamp_mode);
|
||||||
|
|
||||||
|
[[nodiscard]] Id AppendProcTexClamp(Id var, Pica::TexturingRegs::ProcTexClamp mode);
|
||||||
|
|
||||||
|
[[nodiscard]] Id AppendProcTexCombineAndMap(Pica::TexturingRegs::ProcTexCombiner combiner,
|
||||||
|
Id u, Id v, Id offset);
|
||||||
|
|
||||||
/// Rounds the provided variable to the nearest 1/255th
|
/// Rounds the provided variable to the nearest 1/255th
|
||||||
[[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
|
[[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
|
||||||
|
|
||||||
|
/// LUT sampling uitlity
|
||||||
|
/// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
|
||||||
|
/// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
|
||||||
|
/// value entries and difference entries.
|
||||||
|
[[nodiscard]] Id ProcTexLookupLUT(Id offset, Id coord);
|
||||||
|
|
||||||
|
/// Generates random noise with proctex
|
||||||
|
[[nodiscard]] Id ProcTexNoiseCoef(Id x);
|
||||||
|
|
||||||
|
/// Samples a color value from the rgba texture lut
|
||||||
|
[[nodiscard]] Id SampleProcTexColor(Id lut_coord, Id level);
|
||||||
|
|
||||||
/// Lookups the lighting LUT at the provided lut_index
|
/// Lookups the lighting LUT at the provided lut_index
|
||||||
[[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
|
[[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
|
||||||
|
|
||||||
@@ -73,9 +96,15 @@ public:
|
|||||||
|
|
||||||
/// Writes the combiner function for the alpha component for the specified TEV stage operation
|
/// Writes the combiner function for the alpha component for the specified TEV stage operation
|
||||||
[[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
|
[[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
|
||||||
bool dump_shader{false};
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Creates a constant array of integers
|
||||||
|
template <typename... T>
|
||||||
|
void InitTableS32(Id table, T... elems) {
|
||||||
|
const Id table_const{ConstS32(elems...)};
|
||||||
|
OpStore(table, table_const);
|
||||||
|
};
|
||||||
|
|
||||||
/// Loads the member specified from the shader_data uniform struct
|
/// Loads the member specified from the shader_data uniform struct
|
||||||
template <typename... Ids>
|
template <typename... Ids>
|
||||||
[[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
|
[[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
|
||||||
@@ -114,9 +143,11 @@ private:
|
|||||||
return uniform_id;
|
return uniform_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool global = true>
|
||||||
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
|
[[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
|
||||||
const Id pointer_type_id{TypePointer(storage_class, type)};
|
const Id pointer_type_id{TypePointer(storage_class, type)};
|
||||||
return AddGlobalVariable(pointer_type_id, storage_class);
|
return global ? AddGlobalVariable(pointer_type_id, storage_class)
|
||||||
|
: AddLocalVariable(pointer_type_id, storage_class);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the id of a signed integer constant of value
|
/// Returns the id of a signed integer constant of value
|
||||||
@@ -126,10 +157,11 @@ private:
|
|||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
[[nodiscard]] Id ConstU32(Args&&... values) {
|
[[nodiscard]] Id ConstU32(Args&&... values) {
|
||||||
constexpr auto size = sizeof...(values);
|
constexpr u32 size = static_cast<u32>(sizeof...(values));
|
||||||
static_assert(size >= 2 && size <= 4);
|
static_assert(size >= 2);
|
||||||
const std::array constituents{Constant(u32_id, values)...};
|
const std::array constituents{Constant(u32_id, values)...};
|
||||||
return ConstantComposite(uvec_ids.Get(size), constituents);
|
const Id type = size <= 4 ? uvec_ids.Get(size) : TypeArray(u32_id, ConstU32(size));
|
||||||
|
return ConstantComposite(type, constituents);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the id of a signed integer constant of value
|
/// Returns the id of a signed integer constant of value
|
||||||
@@ -139,10 +171,11 @@ private:
|
|||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
[[nodiscard]] Id ConstS32(Args&&... values) {
|
[[nodiscard]] Id ConstS32(Args&&... values) {
|
||||||
constexpr auto size = sizeof...(values);
|
constexpr u32 size = static_cast<u32>(sizeof...(values));
|
||||||
static_assert(size >= 2 && size <= 4);
|
static_assert(size >= 2);
|
||||||
const std::array constituents{Constant(i32_id, values)...};
|
const std::array constituents{Constant(i32_id, values)...};
|
||||||
return ConstantComposite(ivec_ids.Get(size), constituents);
|
const Id type = size <= 4 ? ivec_ids.Get(size) : TypeArray(i32_id, ConstU32(size));
|
||||||
|
return ConstantComposite(type, constituents);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the id of a float constant of value
|
/// Returns the id of a float constant of value
|
||||||
@@ -152,10 +185,11 @@ private:
|
|||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
[[nodiscard]] Id ConstF32(Args... values) {
|
[[nodiscard]] Id ConstF32(Args... values) {
|
||||||
constexpr auto size = sizeof...(values);
|
constexpr u32 size = static_cast<u32>(sizeof...(values));
|
||||||
static_assert(size >= 2 && size <= 4);
|
static_assert(size >= 2);
|
||||||
const std::array constituents{Constant(f32_id, values)...};
|
const std::array constituents{Constant(f32_id, values)...};
|
||||||
return ConstantComposite(vec_ids.Get(size), constituents);
|
const Id type = size <= 4 ? vec_ids.Get(size) : TypeArray(f32_id, ConstU32(size));
|
||||||
|
return ConstantComposite(type, constituents);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DefineArithmeticTypes();
|
void DefineArithmeticTypes();
|
||||||
@@ -215,6 +249,8 @@ private:
|
|||||||
Id shadow_texture_nz_id{};
|
Id shadow_texture_nz_id{};
|
||||||
|
|
||||||
Id texture_buffer_lut_lf{};
|
Id texture_buffer_lut_lf{};
|
||||||
|
Id texture_buffer_lut_rg{};
|
||||||
|
Id texture_buffer_lut_rgba{};
|
||||||
|
|
||||||
Id rounded_primary_color{};
|
Id rounded_primary_color{};
|
||||||
Id primary_fragment_color{};
|
Id primary_fragment_color{};
|
||||||
@@ -229,6 +265,11 @@ private:
|
|||||||
Id alpha_results_1{};
|
Id alpha_results_1{};
|
||||||
Id alpha_results_2{};
|
Id alpha_results_2{};
|
||||||
Id alpha_results_3{};
|
Id alpha_results_3{};
|
||||||
|
|
||||||
|
Id proctex_func{};
|
||||||
|
Id noise1d_table{};
|
||||||
|
Id noise2d_table{};
|
||||||
|
Id lut_offsets{};
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -179,8 +179,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
|||||||
includer)) {
|
includer)) {
|
||||||
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
|
LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
|
||||||
shader->getInfoDebugLog());
|
shader->getInfoDebugLog());
|
||||||
LOG_CRITICAL(Render_Vulkan, "{}", code);
|
fmt::print("{}", code);
|
||||||
ASSERT(false);
|
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user