diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 4dd39806a..b58faafdd 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,9 @@ +2003-09-29 J"orn Rennecke + + * libc/machine/sh/strncpy.S: New file. + * libc/machine/sh/Makefile.am: Add entry & rule for new file. + * libc/machine/sh/Makefile.in: Regenerate. + 2003-09-11 James E Wilson * MAINTAINERS: Update my e-mail address. diff --git a/newlib/libc/machine/sh/Makefile.am b/newlib/libc/machine/sh/Makefile.am index dd4048bc1..14cd95a00 100644 --- a/newlib/libc/machine/sh/Makefile.am +++ b/newlib/libc/machine/sh/Makefile.am @@ -6,13 +6,18 @@ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) noinst_LIBRARIES = lib.a +if SH64 +lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S strncpy.S +else lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S +endif memcpy.o: asm.h memset.o: asm.h setjmp.o: asm.h strcpy.o: asm.h strcmp.o: asm.h +strncpy.o: asm.h ACLOCAL_AMFLAGS = -I ../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host diff --git a/newlib/libc/machine/sh/Makefile.in b/newlib/libc/machine/sh/Makefile.in index 5fe70808b..db73facd1 100644 --- a/newlib/libc/machine/sh/Makefile.in +++ b/newlib/libc/machine/sh/Makefile.in @@ -1,6 +1,6 @@ -# Makefile.in generated automatically by automake 1.4 from Makefile.am +# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am -# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. +# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -88,8 +88,8 @@ AUTOMAKE_OPTIONS = cygnus INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) noinst_LIBRARIES = lib.a - -lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S +@SH64_TRUE@lib_a_SOURCES = @SH64_TRUE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S strncpy.S +@SH64_FALSE@lib_a_SOURCES = @SH64_FALSE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S ACLOCAL_AMFLAGS = -I ../../.. CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host @@ -103,7 +103,10 @@ DEFS = @DEFS@ -I. -I$(srcdir) CPPFLAGS = @CPPFLAGS@ LIBS = @LIBS@ lib_a_LIBADD = -lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o strlen.o strcmp.o +@SH64_TRUE@lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o strlen.o \ +@SH64_TRUE@strcmp.o strncpy.o +@SH64_FALSE@lib_a_OBJECTS = memcpy.o memset.o setjmp.o strcpy.o \ +@SH64_FALSE@strlen.o strcmp.o CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) @@ -327,6 +330,7 @@ memset.o: asm.h setjmp.o: asm.h strcpy.o: asm.h strcmp.o: asm.h +strncpy.o: asm.h # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/newlib/libc/machine/sh/strncpy.S b/newlib/libc/machine/sh/strncpy.S new file mode 100644 index 000000000..c22ae661f --- /dev/null +++ b/newlib/libc/machine/sh/strncpy.S @@ -0,0 +1,209 @@ +/* Copyright 2003 SuperH Ltd. */ + +#include "asm.h" + +#ifdef __SH5__ +#if __SHMEDIA__ + +#ifdef __LITTLE_ENDIAN__ +#define ZPAD_MASK(src, dst) addi src, -1, dst +#else +#define ZPAD_MASK(src, dst) \ + byterev src, dst; addi dst, -1, dst; byterev dst, dst +#endif + + +/* We assume that the destination is not in the first 16 bytes of memory. + A typical linker script will put the text section first, and as + this code is longer that 16 bytes, you have to get out of your way + to put data there. */ +ENTRY(strncpy) + pt L_small, tr2 + ldlo.q r3, 0, r0 + shlli r3, 3, r19 + mcmpeq.b r0, r63, r1 + SHHI r1, r19, r7 + add r2, r4, r20 + addi r20, -8, r5 + /* If the size is greater than 8, we know we can read beyond the first + (possibly partial) quadword, and write out a full first and last + (possibly unaligned and/or overlapping) quadword. */ + bge/u r2, r5, tr2 // L_small + pt L_found0, tr0 + addi r2, 8, r22 + bnei/u r7, 0, tr0 // L_found0 + ori r3, -8, r38 + pt L_end_early, tr1 + sub r2, r38, r22 + stlo.q r2, 0, r0 + sthi.q r2, 7, r0 + sub r3, r2, r6 + ldx.q r22, r6, r0 + /* Before each iteration, check that we can store in full the next quad we + are about to fetch. */ + addi r5, -8, r36 + bgtu/u r22, r36, tr1 // L_end_early + pt L_scan0, tr1 +L_scan0: + addi r22, 8, r22 + mcmpeq.b r0, r63, r1 + stlo.q r22, -8, r0 + bnei/u r1, 0, tr0 // L_found0 + sthi.q r22, -1, r0 + ldx.q r22, r6, r0 + bgeu/l r36, r22, tr1 // L_scan0 +L_end: + // At end; we might re-read a few bytes when we fetch the last quad. + // branch mispredict, so load is ready now. + mcmpeq.b r0, r63, r1 + addi r22, 8, r22 + bnei/u r1, 0, tr0 // L_found0 + add r3, r4, r7 + ldlo.q r7, -8, r1 + ldhi.q r7, -1, r7 + ptabs r18, tr0 + stlo.q r22, -8, r0 + or r1, r7, r1 + mcmpeq.b r1, r63, r7 + sthi.q r22, -1, r0 + ZPAD_MASK (r7, r7) + and r1, r7, r1 // mask out non-zero bytes after first zero byte + stlo.q r20, -8, r1 + sthi.q r20, -1, r1 + blink tr0, r63 + +L_end_early: + /* Check if we can store the current quad in full. */ + pt L_end, tr1 + add r3, r4, r7 + bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short. + /* If not, that means we can just proceed to process the last quad. + Two pipeline stalls are unavoidable, as we don't have enough ILP. */ + ldlo.q r7, -8, r1 + ldhi.q r7, -1, r7 + ptabs r18, tr0 + or r1, r7, r1 + mcmpeq.b r1, r63, r7 + ZPAD_MASK (r7, r7) + and r1, r7, r1 // mask out non-zero bytes after first zero byte + stlo.q r20, -8, r1 + sthi.q r20, -1, r1 + blink tr0, r63 + +L_found0: + // r0: string to store, not yet zero-padding normalized. + // r1: result of mcmpeq.b r0, r63, r1. + // r22: store address plus 8. I.e. address where zero padding beyond the + // string in r0 goes. + // r20: store end address. + // r5: store end address minus 8. + pt L_write0_multiquad, tr0 + ZPAD_MASK (r1, r1) + and r0, r1, r0 // mask out non-zero bytes after first zero byte + stlo.q r22, -8, r0 + sthi.q r22, -1, r0 + andi r22, -8, r1 // Check if zeros to write fit in one quad word. + bgtu/l r5, r1, tr0 // L_write0_multiquad + ptabs r18, tr1 + sub r20, r22, r1 + shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is + SHLO r0, r1, r0 // handled correctly. + SHLO r0, r1, r0 + sthi.q r20, -1, r0 + blink tr1, r63 + +L_write0_multiquad: + pt L_write0_loop, tr0 + ptabs r18, tr1 + stlo.q r22, 0, r63 + sthi.q r20, -1, r63 + addi r1, 8, r1 + bgeu/l r5, r1, tr0 // L_write0_loop + blink tr1, r63 + +L_write0_loop: + st.q r1, 0 ,r63 + addi r1, 8, r1 + bgeu/l r5, r1, tr0 // L_write0_loop + blink tr1, r63 + +L_small: + // r0: string to store, not yet zero-padding normalized. + // r1: result of mcmpeq.b r0, r63, r1. + // r7: nonzero indicates relevant zero found r0. + // r2: store address. + // r3: read address. + // r4: size, max 8 + // r20: store end address. + // r5: store end address minus 8. + pt L_nohi, tr0 + pt L_small_storelong, tr1 + ptabs r18, tr2 + sub r63, r4, r23 + bnei/u r7, 0, tr0 // L_nohi + ori r3, -8, r7 + bge/l r23, r7, tr0 // L_nohi + ldhi.q r3, 7, r1 + or r0, r1, r0 + mcmpeq.b r0, r63, r1 +L_nohi: + ZPAD_MASK (r1, r1) + and r0, r1, r0 + movi 4, r19 + bge/u r4, r19, tr1 // L_small_storelong + + pt L_small_end, tr0 +#ifndef __LITTLE_ENDIAN__ + byterev r0, r0 +#endif + beqi/u r4, 0, tr0 // L_small_end + st.b r2, 0, r0 + beqi/u r4, 1, tr0 // L_small_end + shlri r0, 8, r0 + st.b r2, 1, r0 + beqi/u r4, 2, tr0 // L_small_end + shlri r0, 8, r0 + st.b r2, 2, r0 +L_small_end: + blink tr2, r63 + +L_small_storelong: + shlli r23, 3, r7 + SHHI r0, r7, r1 +#ifdef __LITTLE_ENDIAN__ + shlri r1, 32, r1 +#else + shlri r0, 32, r0 +#endif + stlo.l r2, 0, r0 + sthi.l r2, 3, r0 + stlo.l r20, -4, r1 + sthi.l r20, -1, r1 + blink tr2, r63 + +#else /* SHcompact */ + +/* This code is optimized for size. Instruction selection is SH5 specific. + SH4 should use a different version. */ +ENTRY(strncpy) + mov #0, r6 + cmp/eq r4, r6 + bt return + mov r2, r5 + add #-1, r5 + add r5, r4 +loop: + bt/s found0 + add #1, r5 + mov.b @r3+, r1 +found0: + cmp/eq r5,r4 + mov.b r1, @r5 + bf/s loop + cmp/eq r1, r6 +return: + rts + nop + +#endif /* SHcompact */ +#endif /* __SH5__ */