Optimize strchr for x86.
* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned searches aren't penalized. Special-case searching for 0.
This commit is contained in:
parent
804c0cc6d0
commit
4962a9453a
@ -1,3 +1,9 @@
|
|||||||
|
2008-05-21 Eric Blake <ebb9@byu.net>
|
||||||
|
|
||||||
|
Optimize strchr for x86.
|
||||||
|
* libc/machine/i386/strchr.S (strchr): Pre-align data so unaligned
|
||||||
|
searches aren't penalized. Special-case searching for 0.
|
||||||
|
|
||||||
2008-05-20 Nick Clifton <nickc@redhat.com>
|
2008-05-20 Nick Clifton <nickc@redhat.com>
|
||||||
|
|
||||||
* libc/sys/sysnecv850/crt0.S (___dso_handle): Define (weak).
|
* libc/sys/sysnecv850/crt0.S (___dso_handle): Define (weak).
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* ====================================================
|
* ====================================================
|
||||||
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
|
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission to use, copy, modify, and distribute this
|
* Permission to use, copy, modify, and distribute this
|
||||||
* software is freely granted, provided that this notice
|
* software is freely granted, provided that this notice
|
||||||
@ -21,14 +21,45 @@ SYM (strchr):
|
|||||||
pushl ebx
|
pushl ebx
|
||||||
xorl ebx,ebx
|
xorl ebx,ebx
|
||||||
movl 8(ebp),edi
|
movl 8(ebp),edi
|
||||||
movb 12(ebp),bl
|
addb 12(ebp),bl
|
||||||
|
|
||||||
#ifndef __OPTIMIZE_SIZE__
|
#ifndef __OPTIMIZE_SIZE__
|
||||||
/* check if string is aligned, if not do check one byte at a time */
|
/* Special case strchr(p,0). */
|
||||||
|
je L25
|
||||||
|
|
||||||
|
/* Do byte-wise checks until string is aligned. */
|
||||||
test $3,edi
|
test $3,edi
|
||||||
jne L9
|
je L5
|
||||||
|
movl edi,eax
|
||||||
|
movb (eax),cl
|
||||||
|
testb cl,cl
|
||||||
|
je L14
|
||||||
|
cmpb bl,cl
|
||||||
|
je L19
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
test $3,edi
|
||||||
|
je L5
|
||||||
|
movl edi,eax
|
||||||
|
movb (eax),cl
|
||||||
|
testb cl,cl
|
||||||
|
je L14
|
||||||
|
cmpb bl,cl
|
||||||
|
je L19
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
test $3,edi
|
||||||
|
je L5
|
||||||
|
movl edi,eax
|
||||||
|
movb (eax),cl
|
||||||
|
testb cl,cl
|
||||||
|
je L14
|
||||||
|
cmpb bl,cl
|
||||||
|
je L19
|
||||||
|
incl edi
|
||||||
|
|
||||||
/* create 4 byte mask which is just the desired byte repeated 4 times */
|
/* create 4 byte mask which is just the desired byte repeated 4 times */
|
||||||
|
L5:
|
||||||
movl ebx,ecx
|
movl ebx,ecx
|
||||||
sall $8,ebx
|
sall $8,ebx
|
||||||
subl $4,edi
|
subl $4,edi
|
||||||
@ -49,11 +80,10 @@ L10:
|
|||||||
testl $-2139062144,edx
|
testl $-2139062144,edx
|
||||||
jne L9
|
jne L9
|
||||||
|
|
||||||
movl ebx,eax
|
xorl ebx,ecx
|
||||||
xorl ecx,eax
|
leal -16843009(ecx),edx
|
||||||
leal -16843009(eax),edx
|
notl ecx
|
||||||
notl eax
|
andl ecx,edx
|
||||||
andl eax,edx
|
|
||||||
testl $-2139062144,edx
|
testl $-2139062144,edx
|
||||||
je L10
|
je L10
|
||||||
#endif /* not __OPTIMIZE_SIZE__ */
|
#endif /* not __OPTIMIZE_SIZE__ */
|
||||||
@ -83,3 +113,60 @@ L19:
|
|||||||
leave
|
leave
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
#ifndef __OPTIMIZE_SIZE__
|
||||||
|
/* Special case strchr(p,0). */
|
||||||
|
#if 0
|
||||||
|
/* Hideous performance on modern machines. */
|
||||||
|
L25:
|
||||||
|
cld
|
||||||
|
movl $-1,ecx
|
||||||
|
xor eax,eax
|
||||||
|
repnz
|
||||||
|
scasb
|
||||||
|
leal -1(edi),eax
|
||||||
|
jmp L19
|
||||||
|
#endif
|
||||||
|
L25:
|
||||||
|
/* Do byte-wise checks until string is aligned. */
|
||||||
|
test $3,edi
|
||||||
|
je L26
|
||||||
|
movl edi,eax
|
||||||
|
movb (eax),cl
|
||||||
|
testb cl,cl
|
||||||
|
je L19
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
test $3,edi
|
||||||
|
je L26
|
||||||
|
movl edi,eax
|
||||||
|
movb (eax),cl
|
||||||
|
testb cl,cl
|
||||||
|
je L19
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
test $3,edi
|
||||||
|
je L26
|
||||||
|
movl edi,eax
|
||||||
|
movb (eax),cl
|
||||||
|
testb cl,cl
|
||||||
|
je L19
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
L26:
|
||||||
|
subl $4,edi
|
||||||
|
|
||||||
|
/* loop performing 4 byte mask checking for desired 0 byte */
|
||||||
|
.p2align 4,,7
|
||||||
|
L27:
|
||||||
|
addl $4,edi
|
||||||
|
movl (edi),ecx
|
||||||
|
leal -16843009(ecx),edx
|
||||||
|
movl ecx,eax
|
||||||
|
notl eax
|
||||||
|
andl eax,edx
|
||||||
|
testl $-2139062144,edx
|
||||||
|
je L27
|
||||||
|
|
||||||
|
jmp L9
|
||||||
|
|
||||||
|
#endif /* !__OPTIMIZE_SIZE__ */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user