* libc/machine/sh/strncpy.S: New file.
* libc/machine/sh/Makefile.am: Add entry & rule for new file. * libc/machine/sh/Makefile.in: Regenerate.
This commit is contained in:
		| @@ -1,3 +1,9 @@ | |||||||
|  | 2003-09-29  J"orn Rennecke <joern.rennecke@superh.com> | ||||||
|  |  | ||||||
|  | 	* libc/machine/sh/strncpy.S: New file. | ||||||
|  | 	* libc/machine/sh/Makefile.am: Add entry & rule for new file. | ||||||
|  | 	* libc/machine/sh/Makefile.in: Regenerate. | ||||||
|  |  | ||||||
| 2003-09-11  James E Wilson  <wilson@specifixinc.com> | 2003-09-11  James E Wilson  <wilson@specifixinc.com> | ||||||
|  |  | ||||||
| 	* MAINTAINERS: Update my e-mail address. | 	* MAINTAINERS: Update my e-mail address. | ||||||
|   | |||||||
| @@ -6,13 +6,18 @@ INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) | |||||||
|  |  | ||||||
| noinst_LIBRARIES = lib.a | noinst_LIBRARIES = lib.a | ||||||
|  |  | ||||||
|  | if SH64 | ||||||
|  | lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S strncpy.S | ||||||
|  | else | ||||||
| lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S | lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S | ||||||
|  | endif | ||||||
|  |  | ||||||
| memcpy.o: asm.h | memcpy.o: asm.h | ||||||
| memset.o: asm.h | memset.o: asm.h | ||||||
| setjmp.o: asm.h | setjmp.o: asm.h | ||||||
| strcpy.o: asm.h | strcpy.o: asm.h | ||||||
| strcmp.o: asm.h | strcmp.o: asm.h | ||||||
|  | strncpy.o: asm.h | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I ../../.. | ACLOCAL_AMFLAGS = -I ../../.. | ||||||
| CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host | CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host | ||||||
|   | |||||||
| @@ -1,6 +1,6 @@ | |||||||
| # Makefile.in generated automatically by automake 1.4 from Makefile.am | # Makefile.in generated automatically by automake 1.4-p5 from Makefile.am | ||||||
|  |  | ||||||
| # Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc. | # Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. | ||||||
| # This Makefile.in is free software; the Free Software Foundation | # This Makefile.in is free software; the Free Software Foundation | ||||||
| # gives unlimited permission to copy and/or distribute it, | # gives unlimited permission to copy and/or distribute it, | ||||||
| # with or without modifications, as long as this notice is preserved. | # with or without modifications, as long as this notice is preserved. | ||||||
| @@ -88,8 +88,8 @@ AUTOMAKE_OPTIONS = cygnus | |||||||
| INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) | INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS) | ||||||
|  |  | ||||||
| noinst_LIBRARIES = lib.a | noinst_LIBRARIES = lib.a | ||||||
|  | @SH64_TRUE@lib_a_SOURCES = @SH64_TRUE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S strncpy.S | ||||||
| lib_a_SOURCES = memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S | @SH64_FALSE@lib_a_SOURCES = @SH64_FALSE@memcpy.S memset.S setjmp.S strcpy.S strlen.S strcmp.S | ||||||
|  |  | ||||||
| ACLOCAL_AMFLAGS = -I ../../.. | ACLOCAL_AMFLAGS = -I ../../.. | ||||||
| CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host | CONFIG_STATUS_DEPENDENCIES = $(newlib_basedir)/configure.host | ||||||
| @@ -103,7 +103,10 @@ DEFS = @DEFS@ -I. -I$(srcdir) | |||||||
| CPPFLAGS = @CPPFLAGS@ | CPPFLAGS = @CPPFLAGS@ | ||||||
| LIBS = @LIBS@ | LIBS = @LIBS@ | ||||||
| lib_a_LIBADD =  | lib_a_LIBADD =  | ||||||
| lib_a_OBJECTS =  memcpy.o memset.o setjmp.o strcpy.o strlen.o strcmp.o | @SH64_TRUE@lib_a_OBJECTS =  memcpy.o memset.o setjmp.o strcpy.o strlen.o \ | ||||||
|  | @SH64_TRUE@strcmp.o strncpy.o | ||||||
|  | @SH64_FALSE@lib_a_OBJECTS =  memcpy.o memset.o setjmp.o strcpy.o \ | ||||||
|  | @SH64_FALSE@strlen.o strcmp.o | ||||||
| CFLAGS = @CFLAGS@ | CFLAGS = @CFLAGS@ | ||||||
| COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) | COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) | ||||||
| CCLD = $(CC) | CCLD = $(CC) | ||||||
| @@ -327,6 +330,7 @@ memset.o: asm.h | |||||||
| setjmp.o: asm.h | setjmp.o: asm.h | ||||||
| strcpy.o: asm.h | strcpy.o: asm.h | ||||||
| strcmp.o: asm.h | strcmp.o: asm.h | ||||||
|  | strncpy.o: asm.h | ||||||
|  |  | ||||||
| # Tell versions [3.59,3.63) of GNU make to not export all variables. | # Tell versions [3.59,3.63) of GNU make to not export all variables. | ||||||
| # Otherwise a system limit (for SysV at least) may be exceeded. | # Otherwise a system limit (for SysV at least) may be exceeded. | ||||||
|   | |||||||
							
								
								
									
										209
									
								
								newlib/libc/machine/sh/strncpy.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								newlib/libc/machine/sh/strncpy.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,209 @@ | |||||||
|  | /* Copyright 2003 SuperH Ltd.  */ | ||||||
|  |  | ||||||
|  | #include "asm.h" | ||||||
|  |  | ||||||
|  | #ifdef __SH5__ | ||||||
|  | #if __SHMEDIA__ | ||||||
|  |  | ||||||
|  | #ifdef __LITTLE_ENDIAN__ | ||||||
|  | #define ZPAD_MASK(src, dst) addi src, -1, dst | ||||||
|  | #else | ||||||
|  | #define ZPAD_MASK(src, dst) \ | ||||||
|  |  byterev src, dst; addi dst, -1, dst; byterev dst, dst | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* We assume that the destination is not in the first 16 bytes of memory. | ||||||
|  |    A typical linker script will put the text section first, and as | ||||||
|  |    this code is longer that 16 bytes, you have to get out of your way | ||||||
|  |     to put data there.  */ | ||||||
|  | ENTRY(strncpy) | ||||||
|  |  pt L_small, tr2 | ||||||
|  |  ldlo.q r3, 0, r0 | ||||||
|  |  shlli r3, 3, r19 | ||||||
|  |  mcmpeq.b r0, r63, r1 | ||||||
|  |  SHHI r1, r19, r7 | ||||||
|  |  add r2, r4, r20 | ||||||
|  |  addi r20, -8, r5 | ||||||
|  |  /* If the size is greater than 8, we know we can read beyond the first | ||||||
|  |     (possibly partial) quadword, and write out a full first and last | ||||||
|  |     (possibly unaligned and/or overlapping) quadword.  */ | ||||||
|  |  bge/u r2, r5, tr2 // L_small | ||||||
|  |  pt L_found0, tr0 | ||||||
|  |  addi r2, 8, r22 | ||||||
|  |  bnei/u r7, 0, tr0  // L_found0 | ||||||
|  |  ori r3, -8, r38 | ||||||
|  |  pt L_end_early, tr1 | ||||||
|  |  sub r2, r38, r22 | ||||||
|  |  stlo.q r2, 0, r0 | ||||||
|  |  sthi.q r2, 7, r0 | ||||||
|  |  sub r3, r2, r6 | ||||||
|  |  ldx.q r22, r6, r0 | ||||||
|  |  /* Before each iteration, check that we can store in full the next quad we | ||||||
|  |     are about to fetch.  */ | ||||||
|  |  addi r5, -8, r36 | ||||||
|  |  bgtu/u r22, r36, tr1 // L_end_early | ||||||
|  |  pt L_scan0, tr1 | ||||||
|  | L_scan0: | ||||||
|  |  addi r22, 8, r22 | ||||||
|  |  mcmpeq.b r0, r63, r1 | ||||||
|  |  stlo.q r22, -8, r0 | ||||||
|  |  bnei/u r1, 0, tr0   // L_found0 | ||||||
|  |  sthi.q r22, -1, r0 | ||||||
|  |  ldx.q r22, r6, r0 | ||||||
|  |  bgeu/l r36, r22, tr1 // L_scan0 | ||||||
|  | L_end: | ||||||
|  |  // At end; we might re-read a few bytes when we fetch the last quad. | ||||||
|  |  // branch mispredict, so load is ready now. | ||||||
|  |  mcmpeq.b r0, r63, r1 | ||||||
|  |  addi r22, 8, r22 | ||||||
|  |  bnei/u r1, 0, tr0   // L_found0 | ||||||
|  |  add r3, r4, r7 | ||||||
|  |  ldlo.q r7, -8, r1 | ||||||
|  |  ldhi.q r7, -1, r7 | ||||||
|  |  ptabs r18, tr0 | ||||||
|  |  stlo.q r22, -8, r0 | ||||||
|  |  or r1, r7, r1 | ||||||
|  |  mcmpeq.b r1, r63, r7 | ||||||
|  |  sthi.q r22, -1, r0 | ||||||
|  |  ZPAD_MASK (r7, r7) | ||||||
|  |  and r1, r7, r1 // mask out non-zero bytes after first zero byte | ||||||
|  |  stlo.q r20, -8, r1 | ||||||
|  |  sthi.q r20, -1, r1 | ||||||
|  |  blink tr0, r63 | ||||||
|  |  | ||||||
|  | L_end_early: | ||||||
|  |  /* Check if we can store the current quad in full.  */ | ||||||
|  |  pt L_end, tr1 | ||||||
|  |  add r3, r4, r7 | ||||||
|  |  bgtu/u r5, r22, tr1 // L_end // Not really unlikely, but gap is short. | ||||||
|  |  /* If not, that means we can just proceed to process the last quad. | ||||||
|  |     Two pipeline stalls are unavoidable, as we don't have enough ILP.  */ | ||||||
|  |  ldlo.q r7, -8, r1 | ||||||
|  |  ldhi.q r7, -1, r7 | ||||||
|  |  ptabs r18, tr0 | ||||||
|  |  or r1, r7, r1 | ||||||
|  |  mcmpeq.b r1, r63, r7 | ||||||
|  |  ZPAD_MASK (r7, r7) | ||||||
|  |  and r1, r7, r1 // mask out non-zero bytes after first zero byte | ||||||
|  |  stlo.q r20, -8, r1 | ||||||
|  |  sthi.q r20, -1, r1 | ||||||
|  |  blink tr0, r63 | ||||||
|  |  | ||||||
|  | L_found0: | ||||||
|  |  // r0: string to store, not yet zero-padding normalized. | ||||||
|  |  // r1: result of mcmpeq.b r0, r63, r1. | ||||||
|  |  // r22: store address plus 8.  I.e. address where zero padding beyond the | ||||||
|  |  //      string in r0 goes. | ||||||
|  |  // r20: store end address. | ||||||
|  |  // r5: store end address minus 8. | ||||||
|  |  pt L_write0_multiquad, tr0 | ||||||
|  |  ZPAD_MASK (r1, r1) | ||||||
|  |  and r0, r1, r0 // mask out non-zero bytes after first zero byte | ||||||
|  |  stlo.q r22, -8, r0 | ||||||
|  |  sthi.q r22, -1, r0 | ||||||
|  |  andi r22, -8, r1 // Check if zeros to write fit in one quad word. | ||||||
|  |  bgtu/l r5, r1, tr0 // L_write0_multiquad | ||||||
|  |  ptabs r18, tr1 | ||||||
|  |  sub r20, r22, r1 | ||||||
|  |  shlli r1, 2, r1 // Do shift in two steps so that 64 bit case is | ||||||
|  |  SHLO r0, r1, r0 // handled correctly. | ||||||
|  |  SHLO r0, r1, r0 | ||||||
|  |  sthi.q r20, -1, r0 | ||||||
|  |  blink tr1, r63 | ||||||
|  |  | ||||||
|  | L_write0_multiquad: | ||||||
|  |  pt L_write0_loop, tr0 | ||||||
|  |  ptabs r18, tr1 | ||||||
|  |  stlo.q r22, 0, r63 | ||||||
|  |  sthi.q r20, -1, r63 | ||||||
|  |  addi r1, 8, r1 | ||||||
|  |  bgeu/l r5, r1, tr0 // L_write0_loop | ||||||
|  |  blink tr1, r63 | ||||||
|  |  | ||||||
|  | L_write0_loop: | ||||||
|  |  st.q r1, 0 ,r63 | ||||||
|  |  addi r1, 8, r1 | ||||||
|  |  bgeu/l r5, r1, tr0 // L_write0_loop | ||||||
|  |  blink tr1, r63 | ||||||
|  |  | ||||||
|  | L_small: | ||||||
|  |  // r0: string to store, not yet zero-padding normalized. | ||||||
|  |  // r1: result of mcmpeq.b r0, r63, r1. | ||||||
|  |  // r7: nonzero indicates relevant zero found r0. | ||||||
|  |  // r2: store address. | ||||||
|  |  // r3: read address. | ||||||
|  |  // r4: size, max 8 | ||||||
|  |  // r20: store end address. | ||||||
|  |  // r5: store end address minus 8. | ||||||
|  |  pt L_nohi, tr0 | ||||||
|  |  pt L_small_storelong, tr1 | ||||||
|  |  ptabs r18, tr2 | ||||||
|  |  sub r63, r4, r23 | ||||||
|  |  bnei/u r7, 0, tr0  // L_nohi | ||||||
|  |  ori r3, -8, r7 | ||||||
|  |  bge/l r23, r7, tr0 // L_nohi | ||||||
|  |  ldhi.q r3, 7, r1 | ||||||
|  |  or r0, r1, r0 | ||||||
|  |  mcmpeq.b r0, r63, r1 | ||||||
|  | L_nohi: | ||||||
|  |  ZPAD_MASK (r1, r1) | ||||||
|  |  and r0, r1, r0 | ||||||
|  |  movi 4, r19 | ||||||
|  |  bge/u r4, r19, tr1 // L_small_storelong | ||||||
|  |  | ||||||
|  |  pt L_small_end, tr0 | ||||||
|  | #ifndef __LITTLE_ENDIAN__ | ||||||
|  |  byterev r0, r0 | ||||||
|  | #endif | ||||||
|  |  beqi/u r4, 0, tr0 // L_small_end | ||||||
|  |  st.b r2, 0, r0 | ||||||
|  |  beqi/u r4, 1, tr0 // L_small_end | ||||||
|  |  shlri r0, 8, r0 | ||||||
|  |  st.b r2, 1, r0 | ||||||
|  |  beqi/u r4, 2, tr0 // L_small_end | ||||||
|  |  shlri r0, 8, r0 | ||||||
|  |  st.b r2, 2, r0 | ||||||
|  | L_small_end: | ||||||
|  |  blink tr2, r63 | ||||||
|  |  | ||||||
|  | L_small_storelong: | ||||||
|  |  shlli r23, 3, r7 | ||||||
|  |  SHHI r0, r7, r1 | ||||||
|  | #ifdef __LITTLE_ENDIAN__ | ||||||
|  |  shlri r1, 32, r1 | ||||||
|  | #else | ||||||
|  |  shlri r0, 32, r0 | ||||||
|  | #endif | ||||||
|  |  stlo.l r2, 0, r0 | ||||||
|  |  sthi.l r2, 3, r0 | ||||||
|  |  stlo.l r20, -4, r1 | ||||||
|  |  sthi.l r20, -1, r1 | ||||||
|  |  blink tr2, r63 | ||||||
|  |  | ||||||
|  | #else /* SHcompact */ | ||||||
|  |  | ||||||
|  | /* This code is optimized for size.  Instruction selection is SH5 specific. | ||||||
|  |    SH4 should use a different version.  */ | ||||||
|  | ENTRY(strncpy) | ||||||
|  |  mov #0, r6 | ||||||
|  |  cmp/eq r4, r6 | ||||||
|  |  bt return | ||||||
|  |  mov r2, r5 | ||||||
|  |  add #-1, r5 | ||||||
|  |  add r5, r4 | ||||||
|  | loop: | ||||||
|  |  bt/s found0 | ||||||
|  |  add #1, r5 | ||||||
|  |  mov.b @r3+, r1 | ||||||
|  | found0: | ||||||
|  |  cmp/eq r5,r4 | ||||||
|  |  mov.b r1, @r5 | ||||||
|  |  bf/s loop | ||||||
|  |  cmp/eq r1, r6 | ||||||
|  | return: | ||||||
|  |  rts | ||||||
|  |  nop | ||||||
|  |   | ||||||
|  | #endif /* SHcompact */ | ||||||
|  | #endif /* __SH5__ */ | ||||||
		Reference in New Issue
	
	Block a user