2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 strlen. * libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align data so unaligned searches aren't penalized. * libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches.
This commit is contained in:
parent
12cf19762d
commit
cae28869c1
|
@ -1,3 +1,11 @@
|
|||
2008-05-26 Eric Blake <ebb9@byu.net>
|
||||
|
||||
Optimize the generic and x86 strlen.
|
||||
* libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align
|
||||
data so unaligned searches aren't penalized.
|
||||
* libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]:
|
||||
Word operations are faster than repnz byte searches.
|
||||
|
||||
2008-05-23 Corinna Vinschen <corinna@vinschen.de>
|
||||
|
||||
* libc/include/sys/_default_fcntl.h: Include <sys/time.h> on Cygwin.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
|
||||
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
|
@ -20,12 +20,75 @@ SYM (strlen):
|
|||
pushl edi
|
||||
movl 8(ebp),edx
|
||||
|
||||
#ifdef __OPTIMIZE_SIZE__
|
||||
cld
|
||||
movl edx,edi
|
||||
movl $4294967295,ecx
|
||||
xor eax,eax
|
||||
repnz
|
||||
scasb
|
||||
#else
|
||||
/* Modern x86 hardware is much faster at double-word
|
||||
manipulation than with bytewise repnz scasb. */
|
||||
|
||||
/* Do byte-wise checks until string is aligned. */
|
||||
movl edx,edi
|
||||
test $3,edi
|
||||
je L5
|
||||
movb (edi),cl
|
||||
incl edi
|
||||
testb cl,cl
|
||||
je L15
|
||||
|
||||
test $3,edi
|
||||
je L5
|
||||
movb (edi),cl
|
||||
incl edi
|
||||
testb cl,cl
|
||||
je L15
|
||||
|
||||
test $3,edi
|
||||
je L5
|
||||
movb (edi),cl
|
||||
incl edi
|
||||
testb cl,cl
|
||||
je L15
|
||||
|
||||
L5:
|
||||
subl $4,edi
|
||||
|
||||
/* loop performing 4 byte mask checking for desired 0 byte */
|
||||
.p2align 4,,7
|
||||
L10:
|
||||
addl $4,edi
|
||||
movl (edi),ecx
|
||||
leal -16843009(ecx),eax
|
||||
notl ecx
|
||||
andl ecx,eax
|
||||
testl $-2139062144,eax
|
||||
je L10
|
||||
|
||||
/* Find which of four bytes is 0. */
|
||||
notl ecx
|
||||
incl edi
|
||||
|
||||
testb cl,cl
|
||||
je L15
|
||||
incl edi
|
||||
shrl $8,ecx
|
||||
|
||||
testb cl,cl
|
||||
je L15
|
||||
incl edi
|
||||
shrl $8,ecx
|
||||
|
||||
testb cl,cl
|
||||
je L15
|
||||
incl edi
|
||||
|
||||
#endif
|
||||
|
||||
L15:
|
||||
subl edx,edi
|
||||
leal -1(edi),eax
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
/*
|
||||
FUNCTION
|
||||
<<strlen>>---character string length
|
||||
|
||||
|
||||
INDEX
|
||||
strlen
|
||||
|
||||
|
@ -57,32 +57,32 @@ size_t
|
|||
_DEFUN (strlen, (str),
|
||||
_CONST char *str)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
_CONST char *start = str;
|
||||
|
||||
while (*str)
|
||||
str++;
|
||||
|
||||
return str - start;
|
||||
#else
|
||||
_CONST char *start = str;
|
||||
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
|
||||
unsigned long *aligned_addr;
|
||||
|
||||
if (!UNALIGNED (str))
|
||||
/* Align the pointer, so we can search a word at a time. */
|
||||
while (UNALIGNED (str))
|
||||
{
|
||||
/* If the string is word-aligned, we can check for the presence of
|
||||
a null in each word-sized block. */
|
||||
aligned_addr = (unsigned long*)str;
|
||||
while (!DETECTNULL (*aligned_addr))
|
||||
aligned_addr++;
|
||||
|
||||
/* Once a null is detected, we check each byte in that block for a
|
||||
precise position of the null. */
|
||||
str = (char*)aligned_addr;
|
||||
if (!*str)
|
||||
return str - start;
|
||||
str++;
|
||||
}
|
||||
|
||||
|
||||
/* If the string is word-aligned, we can check for the presence of
|
||||
a null in each word-sized block. */
|
||||
aligned_addr = (unsigned long *)str;
|
||||
while (!DETECTNULL (*aligned_addr))
|
||||
aligned_addr++;
|
||||
|
||||
/* Once a null is detected, we check each byte in that block for a
|
||||
precise position of the null. */
|
||||
str = (char *) aligned_addr;
|
||||
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
|
||||
while (*str)
|
||||
str++;
|
||||
return str - start;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue