strlen-armv7.S: Import latest strlen cortex-strings code.
Import the latest version of strlen from the Linaro cortex-strings package. This version is faster across a variety of block size and alignments on ARMv7. newlib/ChangeLog: 2013-06-21 Will Newton <will.newton@linaro.org> * libc/machine/arm/strlen-armv7.S: Import latest strlen code from Linaro cortex-strings.
This commit is contained in:
parent
a1a7a74e6b
commit
c8af057907
@ -1,3 +1,8 @@
|
|||||||
|
2013-06-21 Will Newton <will.newton@linaro.org>
|
||||||
|
|
||||||
|
* libc/machine/arm/strlen-armv7.S: Import latest strlen
|
||||||
|
code from Linaro cortex-strings.
|
||||||
|
|
||||||
2013-06-21 Will Newton <will.newton@linaro.org>
|
2013-06-21 Will Newton <will.newton@linaro.org>
|
||||||
|
|
||||||
* MAINTAINERS: Add Will Newton to Write After Approval.
|
* MAINTAINERS: Add Will Newton to Write After Approval.
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (c) 2010-2011, Linaro Limited
|
/* Copyright (c) 2010-2011,2013 Linaro Limited
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -28,100 +28,130 @@
|
|||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
Written by Dave Gilbert <david.gilbert@linaro.org>
|
Assumes:
|
||||||
|
ARMv6T2, AArch32
|
||||||
This strlen routine is optimised on a Cortex-A9 and should work on
|
*/
|
||||||
all ARMv7 processors. This routine is reasonably fast for short
|
|
||||||
strings, but is probably slower than a simple implementation if all
|
|
||||||
your strings are very short */
|
|
||||||
|
|
||||||
@ 2011-02-08 david.gilbert@linaro.org
|
|
||||||
@ Extracted from local git 6848613a
|
|
||||||
@ 2011-10-13 david.gilbert@linaro.org
|
|
||||||
@ Extracted from cortex-strings bzr rev 63
|
|
||||||
@ Integrate to newlib, flip to ldrd
|
|
||||||
@ Pull in Endian macro from my memchr
|
|
||||||
|
|
||||||
#include "arm_asm.h"
|
#include "arm_asm.h"
|
||||||
|
|
||||||
@ NOTE: This ifdef MUST match the ones in arm/strlen.c
|
/* NOTE: This ifdef MUST match the ones in arm/strlen.c
|
||||||
@ We fallback to the one in arm/strlen.c for size optimised or
|
We fallback to the one in arm/strlen.c for size optimised or
|
||||||
@ for older arch's
|
for older architectures. */
|
||||||
#if defined(_ISA_ARM_7) || defined(__ARM_ARCH_6T2__) && \
|
#if defined(_ISA_ARM_7) || defined(__ARM_ARCH_6T2__) && \
|
||||||
!(defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
|
!(defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
|
||||||
(defined (__thumb__) && !defined (__thumb2__)))
|
(defined (__thumb__) && !defined (__thumb2__)))
|
||||||
|
|
||||||
@ this lets us check a flag in a 00/ff byte easily in either endianness
|
.macro def_fn f p2align=0
|
||||||
|
.text
|
||||||
|
.p2align \p2align
|
||||||
|
.global \f
|
||||||
|
.type \f, %function
|
||||||
|
\f:
|
||||||
|
.endm
|
||||||
|
|
||||||
#ifdef __ARMEB__
|
#ifdef __ARMEB__
|
||||||
#define CHARTSTMASK(c) 1<<(31-(c*8))
|
#define S2LO lsl
|
||||||
|
#define S2HI lsr
|
||||||
#else
|
#else
|
||||||
#define CHARTSTMASK(c) 1<<(c*8)
|
#define S2LO lsr
|
||||||
|
#define S2HI lsl
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@------------------------------------------------------------------------------
|
/* This code requires Thumb. */
|
||||||
|
.thumb
|
||||||
.syntax unified
|
.syntax unified
|
||||||
.arch armv7-a
|
|
||||||
|
|
||||||
.thumb_func
|
/* Parameters and result. */
|
||||||
.align 2
|
#define srcin r0
|
||||||
.p2align 4,,15
|
#define result r0
|
||||||
.global strlen
|
|
||||||
.type strlen,%function
|
|
||||||
strlen:
|
|
||||||
@ r0 = string
|
|
||||||
@ returns count of bytes in string not including terminator
|
|
||||||
mov r1, r0
|
|
||||||
push { r4,r6 }
|
|
||||||
mvns r6, #0 @ all F
|
|
||||||
movs r4, #0
|
|
||||||
tst r0, #7
|
|
||||||
beq 2f
|
|
||||||
|
|
||||||
1:
|
/* Internal variables. */
|
||||||
ldrb r2, [r1], #1
|
#define src r1
|
||||||
tst r1, #7 @ Hit alignment yet?
|
#define data1a r2
|
||||||
cbz r2, 10f @ Exit if we found the 0
|
#define data1b r3
|
||||||
bne 1b
|
#define const_m1 r12
|
||||||
|
#define const_0 r4
|
||||||
|
#define tmp1 r4 /* Overlaps const_0 */
|
||||||
|
#define tmp2 r5
|
||||||
|
|
||||||
@ So we're now aligned
|
def_fn strlen p2align=6
|
||||||
2:
|
pld [srcin, #0]
|
||||||
ldrd r2,r3,[r1],#8
|
strd r4, r5, [sp, #-8]!
|
||||||
uadd8 r2, r2, r6 @ Par add 0xff - sets the GE bits for bytes!=0
|
bic src, srcin, #7
|
||||||
sel r2, r4, r6 @ bytes are 00 for none-00 bytes,
|
mvn const_m1, #0
|
||||||
@ or ff for 00 bytes - NOTE INVERSION
|
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
|
||||||
uadd8 r3, r3, r6 @ Par add 0xff - sets the GE bits for bytes!=0
|
pld [src, #32]
|
||||||
sel r3, r2, r6 @ chained...bytes are 00 for none-00 bytes,
|
bne.w .Lmisaligned8
|
||||||
@ or ff for 00 bytes - NOTE INVERSION
|
mov const_0, #0
|
||||||
cmp r3, #0
|
mov result, #-8
|
||||||
beq 2b
|
.Lloop_aligned:
|
||||||
|
/* Bytes 0-7. */
|
||||||
|
ldrd data1a, data1b, [src]
|
||||||
|
pld [src, #64]
|
||||||
|
add result, result, #8
|
||||||
|
.Lstart_realigned:
|
||||||
|
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||||
|
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||||
|
uadd8 data1b, data1b, const_m1
|
||||||
|
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||||
|
cbnz data1b, .Lnull_found
|
||||||
|
|
||||||
strlenendtmp:
|
/* Bytes 8-15. */
|
||||||
@ One (or more) of the bytes we loaded was 0 - but which one?
|
ldrd data1a, data1b, [src, #8]
|
||||||
@ r2 has the mask corresponding to the first loaded word
|
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||||
@ r3 has a combined mask of the two words - but if r2 was all-non 0
|
add result, result, #8
|
||||||
@ then it's just the 2nd words
|
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||||
cmp r2, #0
|
uadd8 data1b, data1b, const_m1
|
||||||
itte eq
|
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||||
moveq r2, r3 @ the end is in the 2nd word
|
cbnz data1b, .Lnull_found
|
||||||
subeq r1,r1,#3
|
|
||||||
subne r1,r1,#7
|
|
||||||
|
|
||||||
@ r1 currently points to the 2nd byte of the word containing the 0
|
/* Bytes 16-23. */
|
||||||
tst r2, # CHARTSTMASK(0) @ 1st character
|
ldrd data1a, data1b, [src, #16]
|
||||||
bne 10f
|
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||||
adds r1,r1,#1
|
add result, result, #8
|
||||||
tst r2, # CHARTSTMASK(1) @ 2nd character
|
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||||
ittt eq
|
uadd8 data1b, data1b, const_m1
|
||||||
addeq r1,r1,#1
|
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||||
tsteq r2, # (3<<15) @ 2nd & 3rd character
|
cbnz data1b, .Lnull_found
|
||||||
@ If not the 3rd must be the last one
|
|
||||||
addeq r1,r1,#1
|
|
||||||
|
|
||||||
10:
|
/* Bytes 24-31. */
|
||||||
@ r0 is still at the beginning, r1 is pointing 1 byte after the nul
|
ldrd data1a, data1b, [src, #24]
|
||||||
sub r0, r1, r0
|
add src, src, #32
|
||||||
subs r0, r0, #1
|
uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
|
||||||
pop { r4, r6 }
|
add result, result, #8
|
||||||
|
sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
|
||||||
|
uadd8 data1b, data1b, const_m1
|
||||||
|
sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
|
||||||
|
cmp data1b, #0
|
||||||
|
beq .Lloop_aligned
|
||||||
|
|
||||||
|
.Lnull_found:
|
||||||
|
cmp data1a, #0
|
||||||
|
itt eq
|
||||||
|
addeq result, result, #4
|
||||||
|
moveq data1a, data1b
|
||||||
|
#ifndef __ARMEB__
|
||||||
|
rev data1a, data1a
|
||||||
|
#endif
|
||||||
|
clz data1a, data1a
|
||||||
|
ldrd r4, r5, [sp], #8
|
||||||
|
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
|
.Lmisaligned8:
|
||||||
|
ldrd data1a, data1b, [src]
|
||||||
|
and tmp2, tmp1, #3
|
||||||
|
rsb result, tmp1, #0
|
||||||
|
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
|
||||||
|
tst tmp1, #4
|
||||||
|
pld [src, #64]
|
||||||
|
S2HI tmp2, const_m1, tmp2
|
||||||
|
orn data1a, data1a, tmp2
|
||||||
|
itt ne
|
||||||
|
ornne data1b, data1b, tmp2
|
||||||
|
movne data1a, const_m1
|
||||||
|
mov const_0, #0
|
||||||
|
b .Lstart_realigned
|
||||||
|
.size strlen, . - strlen
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user