2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>

* libc/machine/aarch64/strnlen.S: Correct arithmetic for
	argument N values close to the maximum representable
	value in an unsigned 64 bit value.
This commit is contained in:
Marcus Shawcroft 2013-01-17 14:52:37 +00:00
parent 211dd84b83
commit 78f66de6ce
2 changed files with 34 additions and 11 deletions

View File

@ -1,3 +1,9 @@
2013-01-17 Marcus Shawcroft <marcus.shawcroft@linaro.org>
* libc/machine/aarch64/strnlen.S: Correct arithmetic for
argument N values close to the maximum representable
value in an unsigned 64 bit value.
2013-01-15 Nick Clifton <nickc@redhat.com> 2013-01-15 Nick Clifton <nickc@redhat.com>
* libc/sys/sysnecv850/crt0.S (_start): Enable FPU for the * libc/sys/sysnecv850/crt0.S (_start): Enable FPU for the

View File

@ -85,8 +85,10 @@ def_fn strnlen
bic src, srcin, #15 bic src, srcin, #15
ands tmp1, srcin, #15 ands tmp1, srcin, #15
b.ne .Lmisaligned b.ne .Lmisaligned
add limit_wd, limit, #15 /* Calculate the number of full and partial words -1. */
lsr limit_wd, limit_wd, #4 sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
can be done in parallel across the entire word. */ can be done in parallel across the entire word. */
@ -107,7 +109,7 @@ def_fn strnlen
bic has_nul2, tmp3, tmp4 bic has_nul2, tmp3, tmp4
subs limit_wd, limit_wd, #1 subs limit_wd, limit_wd, #1
orr tmp1, has_nul1, has_nul2 orr tmp1, has_nul1, has_nul2
ccmp tmp1, #0, #0, ne /* NZCV = 0000 */ ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
b.eq .Lloop b.eq .Lloop
/* End of critical section -- keep to one 64Byte cache line. */ /* End of critical section -- keep to one 64Byte cache line. */
@ -145,23 +147,38 @@ def_fn strnlen
ret ret
.Lmisaligned: .Lmisaligned:
add tmp3, limit, tmp1 /* Deal with a partial first word.
We're doing two things in parallel here;
1) Calculate the number of words (but avoiding overflow if
limit is near ULONG_MAX) - to do this we need to work out
limit + tmp1 - 1 as a 65-bit value before shifting it;
2) Load and mask the initial data words - we force the bytes
before the ones we are interested in to 0xff - this ensures
early bytes will not hit any zero detection. */
sub limit_wd, limit, #1
neg tmp4, tmp1
cmp tmp1, #8 cmp tmp1, #8
neg tmp1, tmp1
ldp data1, data2, [src], #16 and tmp3, limit_wd, #15
add limit_wd, tmp3, #15
lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
mov tmp2, #~0
lsr limit_wd, limit_wd, #4 lsr limit_wd, limit_wd, #4
mov tmp2, #~0
ldp data1, data2, [src], #16
lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
add tmp3, tmp3, tmp1
#ifdef __AARCH64EB__ #ifdef __AARCH64EB__
/* Big-endian. Early bytes are at MSB. */ /* Big-endian. Early bytes are at MSB. */
lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
#else #else
/* Little-endian. Early bytes are at LSB. */ /* Little-endian. Early bytes are at LSB. */
lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
#endif #endif
add limit_wd, limit_wd, tmp3, lsr #4
orr data1, data1, tmp2 orr data1, data1, tmp2
orr data2a, data2, tmp2 orr data2a, data2, tmp2
csinv data1, data1, xzr, le csinv data1, data1, xzr, le
csel data2, data2, data2a, le csel data2, data2, data2a, le
b .Lrealigned b .Lrealigned