From 87375c75b377633e93e1555b5f05c43f8d767a30 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 11 Jul 2014 09:10:50 +0000 Subject: [PATCH] [aarch64] Add memchr. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2014-07-11 K�vin Petit * libc/machine/aarch64/memchr.S: New file. * libc/machine/aarch64/memchr-stub.c: New file. * libc/machine/aarch64/Makefile.am: Add the new files. * libc/machine/aarch64/Makefile.in: Regenerated. --- newlib/ChangeLog | 7 + newlib/libc/machine/aarch64/Makefile.am | 2 + newlib/libc/machine/aarch64/Makefile.in | 25 +++- newlib/libc/machine/aarch64/memchr-stub.c | 31 ++++ newlib/libc/machine/aarch64/memchr.S | 170 ++++++++++++++++++++++ 5 files changed, 229 insertions(+), 6 deletions(-) create mode 100644 newlib/libc/machine/aarch64/memchr-stub.c create mode 100644 newlib/libc/machine/aarch64/memchr.S diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 507e7e0db..059ef57d5 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,10 @@ +2014-07-11 Kévin Petit + + * libc/machine/aarch64/memchr.S: New file. + * libc/machine/aarch64/memchr-stub.c: New file. + * libc/machine/aarch64/Makefile.am: Add the new files. + * libc/machine/aarch64/Makefile.in: Regenerated. + 2014-07-07 Pavel Pisa Richard Earnshaw diff --git a/newlib/libc/machine/aarch64/Makefile.am b/newlib/libc/machine/aarch64/Makefile.am index 43946bac3..b9fa7cb15 100644 --- a/newlib/libc/machine/aarch64/Makefile.am +++ b/newlib/libc/machine/aarch64/Makefile.am @@ -9,6 +9,8 @@ AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a lib_a_SOURCES = +lib_a_SOURCES += memchr-stub.c +lib_a_SOURCES += memchr.S lib_a_SOURCES += memcmp-stub.c lib_a_SOURCES += memcmp.S lib_a_SOURCES += memcpy-stub.c diff --git a/newlib/libc/machine/aarch64/Makefile.in b/newlib/libc/machine/aarch64/Makefile.in index d1a15f196..fd376957c 100644 --- a/newlib/libc/machine/aarch64/Makefile.in +++ b/newlib/libc/machine/aarch64/Makefile.in @@ -69,7 +69,8 @@ LIBRARIES = $(noinst_LIBRARIES) ARFLAGS = cru lib_a_AR = $(AR) $(ARFLAGS) lib_a_LIBADD = -am_lib_a_OBJECTS = lib_a-memcmp-stub.$(OBJEXT) lib_a-memcmp.$(OBJEXT) \ +am_lib_a_OBJECTS = lib_a-memchr-stub.$(OBJEXT) lib_a-memchr.$(OBJEXT) \ + lib_a-memcmp-stub.$(OBJEXT) lib_a-memcmp.$(OBJEXT) \ lib_a-memcpy-stub.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \ lib_a-memmove-stub.$(OBJEXT) lib_a-memmove.$(OBJEXT) \ lib_a-memset-stub.$(OBJEXT) lib_a-memset.$(OBJEXT) \ @@ -205,11 +206,11 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) AM_CCASFLAGS = $(INCLUDES) noinst_LIBRARIES = lib.a -lib_a_SOURCES = memcmp-stub.c memcmp.S memcpy-stub.c memcpy.S \ - memmove-stub.c memmove.S memset-stub.c memset.S setjmp.S \ - strchr-stub.c strchr.S strchrnul-stub.c strchrnul.S \ - strcmp-stub.c strcmp.S strlen-stub.c strlen.S strncmp-stub.c \ - strncmp.S strnlen-stub.c strnlen.S +lib_a_SOURCES = memchr-stub.c memchr.S memcmp-stub.c memcmp.S \ + memcpy-stub.c memcpy.S memmove-stub.c memmove.S memset-stub.c \ + memset.S setjmp.S strchr-stub.c strchr.S strchrnul-stub.c \ + strchrnul.S strcmp-stub.c strcmp.S strlen-stub.c strlen.S \ + strncmp-stub.c strncmp.S strnlen-stub.c strnlen.S lib_a_CCASFLAGS = $(AM_CCASFLAGS) lib_a_CFLAGS = $(AM_CFLAGS) ACLOCAL_AMFLAGS = -I ../../.. -I ../../../.. @@ -272,6 +273,12 @@ distclean-compile: .S.obj: $(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` +lib_a-memchr.o: memchr.S + $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memchr.o `test -f 'memchr.S' || echo '$(srcdir)/'`memchr.S + +lib_a-memchr.obj: memchr.S + $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memchr.obj `if test -f 'memchr.S'; then $(CYGPATH_W) 'memchr.S'; else $(CYGPATH_W) '$(srcdir)/memchr.S'; fi` + lib_a-memcmp.o: memcmp.S $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-memcmp.o `test -f 'memcmp.S' || echo '$(srcdir)/'`memcmp.S @@ -344,6 +351,12 @@ lib_a-strnlen.obj: strnlen.S .c.obj: $(COMPILE) -c `$(CYGPATH_W) '$<'` +lib_a-memchr-stub.o: memchr-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memchr-stub.o `test -f 'memchr-stub.c' || echo '$(srcdir)/'`memchr-stub.c + +lib_a-memchr-stub.obj: memchr-stub.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memchr-stub.obj `if test -f 'memchr-stub.c'; then $(CYGPATH_W) 'memchr-stub.c'; else $(CYGPATH_W) '$(srcdir)/memchr-stub.c'; fi` + lib_a-memcmp-stub.o: memcmp-stub.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-memcmp-stub.o `test -f 'memcmp-stub.c' || echo '$(srcdir)/'`memcmp-stub.c diff --git a/newlib/libc/machine/aarch64/memchr-stub.c b/newlib/libc/machine/aarch64/memchr-stub.c new file mode 100644 index 000000000..dc550aa9d --- /dev/null +++ b/newlib/libc/machine/aarch64/memchr-stub.c @@ -0,0 +1,31 @@ +/* Copyright (c) 2013, Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) +# include "../../string/memchr.c" +#else +/* See memchr.S */ +#endif diff --git a/newlib/libc/machine/aarch64/memchr.S b/newlib/libc/machine/aarch64/memchr.S new file mode 100644 index 000000000..eb59a5aad --- /dev/null +++ b/newlib/libc/machine/aarch64/memchr.S @@ -0,0 +1,170 @@ +/* + * memchr - find a character in a memory zone + * + * Copyright (c) 2014, ARM Limited + * All rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the company nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) +/* See memchr-stub.c */ +#else +/* Assumptions: + * + * ARMv8-a, AArch64 + * Neon Available. + */ + +/* Arguments and results. */ +#define srcin x0 +#define chrin w1 +#define cntin x2 + +#define result x0 + +#define src x3 +#define tmp x4 +#define wtmp2 w5 +#define synd x6 +#define soff x9 +#define cntrem x10 + +#define vrepchr v0 +#define vdata1 v1 +#define vdata2 v2 +#define vhas_chr1 v3 +#define vhas_chr2 v4 +#define vrepmask v5 +#define vend v6 + +/* + * Core algorithm: + * + * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits + * per byte. For each tuple, bit 0 is set if the relevant byte matched the + * requested character and bit 1 is not used (faster than using a 32bit + * syndrome). Since the bits in the syndrome reflect exactly the order in which + * things occur in the original string, counting trailing zeros allows to + * identify exactly which byte has matched. + */ + + .macro def_fn f p2align=0 + .text + .p2align \p2align + .global \f + .type \f, %function +\f: + .endm + +def_fn memchr + /* + * Magic constant 0x40100401 allows us to identify which lane matches + * the requested byte. + */ + mov wtmp2, #0x0401 + movk wtmp2, #0x4010, lsl #16 + dup vrepchr.16b, chrin + /* Work with aligned 32-byte chunks */ + bic src, srcin, #31 + dup vrepmask.4s, wtmp2 + ands soff, srcin, #31 + and cntrem, cntin, #31 + b.eq .Lloop + + /* + * Input string is not 32-byte aligned. We calculate the syndrome + * value for the aligned 32 bytes block containing the first bytes + * and mask the irrelevant part. + */ + + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + sub tmp, soff, #32 + adds cntin, cntin, tmp + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b + addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ + addp vend.16b, vend.16b, vend.16b /* 128->64 */ + mov synd, vend.2d[0] + /* Clear the soff*2 lower bits */ + lsl tmp, soff, #1 + lsr synd, synd, tmp + lsl synd, synd, tmp + /* The first block can also be the last */ + b.ls .Lmasklast + /* Have we found something already? */ + cbnz synd, .Ltail + +.Lloop: + ld1 {vdata1.16b, vdata2.16b}, [src], #32 + subs cntin, cntin, #32 + cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b + cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b + /* If we're out of data we finish regardless of the result */ + b.ls .Lend + /* Use a fast check for the termination condition */ + orr vend.16b, vhas_chr1.16b, vhas_chr2.16b + addp vend.2d, vend.2d, vend.2d + mov synd, vend.2d[0] + /* We're not out of data, loop if we haven't found the character */ + cbz synd, .Lloop + +.Lend: + /* Termination condition found, let's calculate the syndrome value */ + and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b + and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b + addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */ + addp vend.16b, vend.16b, vend.16b /* 128->64 */ + mov synd, vend.2d[0] + /* Only do the clear for the last possible block */ + b.hi .Ltail + +.Lmasklast: + /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */ + add tmp, cntrem, soff + and tmp, tmp, #31 + sub tmp, tmp, #32 + neg tmp, tmp, lsl #1 + lsl synd, synd, tmp + lsr synd, synd, tmp + +.Ltail: + /* Count the trailing zeros using bit reversing */ + rbit synd, synd + /* Compensate the last post-increment */ + sub src, src, #32 + /* Check that we have found a character */ + cmp synd, #0 + /* And count the leading zeros */ + clz synd, synd + /* Compute the potential result */ + add result, src, synd, lsr #1 + /* Select result or NULL */ + csel result, xzr, result, eq + ret + + .size memchr, . - memchr +#endif