2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 strlen. * libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align data so unaligned searches aren't penalized. * libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches.
This commit is contained in:
parent
12cf19762d
commit
cae28869c1
@ -1,3 +1,11 @@
|
|||||||
|
2008-05-26 Eric Blake <ebb9@byu.net>
|
||||||
|
|
||||||
|
Optimize the generic and x86 strlen.
|
||||||
|
* libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align
|
||||||
|
data so unaligned searches aren't penalized.
|
||||||
|
* libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]:
|
||||||
|
Word operations are faster than repnz byte searches.
|
||||||
|
|
||||||
2008-05-23 Corinna Vinschen <corinna@vinschen.de>
|
2008-05-23 Corinna Vinschen <corinna@vinschen.de>
|
||||||
|
|
||||||
* libc/include/sys/_default_fcntl.h: Include <sys/time.h> on Cygwin.
|
* libc/include/sys/_default_fcntl.h: Include <sys/time.h> on Cygwin.
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* ====================================================
|
* ====================================================
|
||||||
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
|
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission to use, copy, modify, and distribute this
|
* Permission to use, copy, modify, and distribute this
|
||||||
* software is freely granted, provided that this notice
|
* software is freely granted, provided that this notice
|
||||||
@ -20,12 +20,75 @@ SYM (strlen):
|
|||||||
pushl edi
|
pushl edi
|
||||||
movl 8(ebp),edx
|
movl 8(ebp),edx
|
||||||
|
|
||||||
|
#ifdef __OPTIMIZE_SIZE__
|
||||||
cld
|
cld
|
||||||
movl edx,edi
|
movl edx,edi
|
||||||
movl $4294967295,ecx
|
movl $4294967295,ecx
|
||||||
xor eax,eax
|
xor eax,eax
|
||||||
repnz
|
repnz
|
||||||
scasb
|
scasb
|
||||||
|
#else
|
||||||
|
/* Modern x86 hardware is much faster at double-word
|
||||||
|
manipulation than with bytewise repnz scasb. */
|
||||||
|
|
||||||
|
/* Do byte-wise checks until string is aligned. */
|
||||||
|
movl edx,edi
|
||||||
|
test $3,edi
|
||||||
|
je L5
|
||||||
|
movb (edi),cl
|
||||||
|
incl edi
|
||||||
|
testb cl,cl
|
||||||
|
je L15
|
||||||
|
|
||||||
|
test $3,edi
|
||||||
|
je L5
|
||||||
|
movb (edi),cl
|
||||||
|
incl edi
|
||||||
|
testb cl,cl
|
||||||
|
je L15
|
||||||
|
|
||||||
|
test $3,edi
|
||||||
|
je L5
|
||||||
|
movb (edi),cl
|
||||||
|
incl edi
|
||||||
|
testb cl,cl
|
||||||
|
je L15
|
||||||
|
|
||||||
|
L5:
|
||||||
|
subl $4,edi
|
||||||
|
|
||||||
|
/* loop performing 4 byte mask checking for desired 0 byte */
|
||||||
|
.p2align 4,,7
|
||||||
|
L10:
|
||||||
|
addl $4,edi
|
||||||
|
movl (edi),ecx
|
||||||
|
leal -16843009(ecx),eax
|
||||||
|
notl ecx
|
||||||
|
andl ecx,eax
|
||||||
|
testl $-2139062144,eax
|
||||||
|
je L10
|
||||||
|
|
||||||
|
/* Find which of four bytes is 0. */
|
||||||
|
notl ecx
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
testb cl,cl
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
shrl $8,ecx
|
||||||
|
|
||||||
|
testb cl,cl
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
shrl $8,ecx
|
||||||
|
|
||||||
|
testb cl,cl
|
||||||
|
je L15
|
||||||
|
incl edi
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
L15:
|
||||||
subl edx,edi
|
subl edx,edi
|
||||||
leal -1(edi),eax
|
leal -1(edi),eax
|
||||||
|
|
||||||
|
@ -57,32 +57,32 @@ size_t
|
|||||||
_DEFUN (strlen, (str),
|
_DEFUN (strlen, (str),
|
||||||
_CONST char *str)
|
_CONST char *str)
|
||||||
{
|
{
|
||||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
|
||||||
_CONST char *start = str;
|
_CONST char *start = str;
|
||||||
|
|
||||||
while (*str)
|
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
|
||||||
str++;
|
|
||||||
|
|
||||||
return str - start;
|
|
||||||
#else
|
|
||||||
_CONST char *start = str;
|
|
||||||
unsigned long *aligned_addr;
|
unsigned long *aligned_addr;
|
||||||
|
|
||||||
if (!UNALIGNED (str))
|
/* Align the pointer, so we can search a word at a time. */
|
||||||
|
while (UNALIGNED (str))
|
||||||
{
|
{
|
||||||
/* If the string is word-aligned, we can check for the presence of
|
if (!*str)
|
||||||
a null in each word-sized block. */
|
return str - start;
|
||||||
aligned_addr = (unsigned long*)str;
|
str++;
|
||||||
while (!DETECTNULL (*aligned_addr))
|
|
||||||
aligned_addr++;
|
|
||||||
|
|
||||||
/* Once a null is detected, we check each byte in that block for a
|
|
||||||
precise position of the null. */
|
|
||||||
str = (char*)aligned_addr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the string is word-aligned, we can check for the presence of
|
||||||
|
a null in each word-sized block. */
|
||||||
|
aligned_addr = (unsigned long *)str;
|
||||||
|
while (!DETECTNULL (*aligned_addr))
|
||||||
|
aligned_addr++;
|
||||||
|
|
||||||
|
/* Once a null is detected, we check each byte in that block for a
|
||||||
|
precise position of the null. */
|
||||||
|
str = (char *) aligned_addr;
|
||||||
|
|
||||||
|
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||||
|
|
||||||
while (*str)
|
while (*str)
|
||||||
str++;
|
str++;
|
||||||
return str - start;
|
return str - start;
|
||||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user