Merge pull request #3261 from MerryMage/DPH

shader_jit_x64_compiler: Use haddps for horizontal summation
This commit is contained in:
Yuri Kunde Schlesner 2017-12-13 09:09:42 -05:00 committed by GitHub
commit aecd2b85fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 14 deletions

View File

@ -387,13 +387,8 @@ void JitShader::Compile_DP4(Instruction instr) {
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
addps(SRC1, SRC2);
movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
addps(SRC1, SRC2);
haddps(SRC1, SRC1);
haddps(SRC1, SRC1);
Compile_DestEnable(instr, SRC1);
}
@ -419,13 +414,8 @@ void JitShader::Compile_DPH(Instruction instr) {
Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
addps(SRC1, SRC2);
movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
addps(SRC1, SRC2);
haddps(SRC1, SRC1);
haddps(SRC1, SRC1);
Compile_DestEnable(instr, SRC1);
}