same as in commitid 1005B6CF36E3932B560, plus assorted cleanup

2018-08-10 02:53:39 +00:00
parent d5ce724471
commit 6cea111ef1
9 changed files with 36 additions and 35 deletions
--- a/check.t
+++ b/check.t
@ -1,4 +1,4 @@
-# $MirOS: src/bin/mksh/check.t,v 1.807 2018/07/15 17:22:15 tg Exp $
+# $MirOS: src/bin/mksh/check.t,v 1.808 2018/08/10 02:53:31 tg Exp $
 # -*- mode: sh -*-
 #-
 # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@ -2487,7 +2487,7 @@ expected-stdout:
 name: glob-range-3
 description:
 	Check that globbing matches the right things...
-# breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
+# breaks on Mac OSX (HFS+ non-standard UTF-8 canonical decomposition)
 # breaks on Cygwin 1.7 (files are now UTF-16 or something)
 # breaks on QNX 6.4.1 (says RT)
 category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2,!os:os390
@ -8512,7 +8512,7 @@ expected-stdout:
 ---
 name: typeset-padding-3
 description:
-	Check for a regression in which Unicode wasn’t left-padded right
+	Check for a regression in which UTF-8 wasn’t left-padded right
 stdin:
 	set -U
 	nl=$'\n'
@ -8535,7 +8535,7 @@ description:
 	Check that the UTF-8 Byte Order Mark is ignored as the first
 	multibyte character of the shell input (with -c, from standard
 	input, as file, or as eval argument), but nowhere else
-# breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
+# breaks on Mac OSX (HFS+ non-standard UTF-8 canonical decomposition)
 category: !os:darwin,!shell:ebcdic-yes
 stdin:
 	mkdir foo
@ -10414,7 +10414,7 @@ expected-stdout:
 	000000C0  9F A0 A1 A2 A3 A4 A5 A6 - A7 A8 A9 AA AB AC AD AE  |................|
 	000000D0  AF B0 B1 B2 B3 B4 B5 B6 - B7 B8 B9 BA BB BC BD BE  |................|
 	000000E0  BF C0 C1 C2 C3 C4 C5 C6 - C7 C8 C9 CA CB CC CD CE  |................|
-	000000F0  CF D0 D1 D2 D3 D4 D5 D6 - D7 D8 D9 DA DB DC DD DE  |................|
+	000000F0  CF D0 D1 D2 D3 D4 D5 D6 - D7 D8 D9 DA DB DC DD DE  |................|
 	00000100  DF E0 E1 E2 E3 E4 E5 E6 - E7 E8 E9 EA EB EC ED EE  |................|
 	00000110  EF F0 F1 F2 F3 F4 F5 F6 - F7 F8 F9 FA FB FC FD FE  |................|
 	00000120  FF 7A 0A                -                          |.z.|
@ -10582,7 +10582,7 @@ expected-stdout:
 	000000C0  9F A0 A1 A2 A3 A4 A5 A6 - A7 A8 A9 AA AB AC AD AE  |................|
 	000000D0  AF B0 B1 B2 B3 B4 B5 B6 - B7 B8 B9 BA BB BC BD BE  |................|
 	000000E0  BF C0 C1 C2 C3 C4 C5 C6 - C7 C8 C9 CA CB CC CD CE  |................|
-	000000F0  CF D0 D1 D2 D3 D4 D5 D6 - D7 D8 D9 DA DB DC DD DE  |................|
+	000000F0  CF D0 D1 D2 D3 D4 D5 D6 - D7 D8 D9 DA DB DC DD DE  |................|
 	00000100  DF E0 E1 E2 E3 E4 E5 E6 - E7 E8 E9 EA EB EC ED EE  |................|
 	00000110  EF F0 F1 F2 F3 F4 F5 F6 - F7 F8 F9 FA FB FC FD FE  |................|
 	00000120  FF 00 7A 0A             -                          |..z.|
@ -10702,7 +10702,7 @@ expected-stdout:
 expected-stdout:
 	1 97
 	2p 'aa': multi-character character constant
-	3 8230
+	3 8230
 	4 <'a'>
 	5 97
 	6 97
@ -10716,7 +10716,7 @@ expected-stdout:
 	print 'a\0b\xfdz' >x
 	read -a y <x
 	set -U
-	typeset -Uui16 y
+	typeset -Uui16 y
 	print ${y[*]} .
 expected-stdout:
 	16#61 16#0 16#62 16#FD 16#7A .
@ -10730,7 +10730,7 @@ expected-stdout:
 	print 'a\0b\xfdz' >x
 	read -a y <x
 	set -U
-	typeset -Uui16 y
+	typeset -Uui16 y
 	print ${y[*]} .
 expected-stdout:
 	16#81 16#0 16#82 16#FD 16#A9 .
--- a/expr.c
+++ b/expr.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.104 2018/06/26 21:22:21 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.105 2018/08/10 02:53:33 tg Exp $");

 #define EXPRTOK_DEFNS
 #include "exprtok.h"
@ -885,7 +885,7 @@ static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
    unsigned int val) MKSH_A_PURE;

 /*
- * Generated from the Unicode Character Database, Version 11.0.0, by
+ * Generated from the UCD 11.0.0 by
 * MirOS: contrib/code/Snippets/eawparse,v 1.12 2017/09/06 16:05:45 tg Exp $
 */

@ -1177,7 +1177,7 @@ mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems, unsigned int val)
 	return (0);
 }

-/* Unix column width of a wide character (Unicode code point, really) */
+/* Unix column width of a wide character (UCS code point, really) */
 int
 utf_wcwidth(unsigned int wc)
 {
--- a/funcs.c
+++ b/funcs.c
@ -38,7 +38,7 @@
 #endif
 #endif

-__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.353 2018/01/14 01:26:49 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.354 2018/08/10 02:53:34 tg Exp $");

 #if HAVE_KILLPG
 /*
@ -493,7 +493,7 @@ c_print(const char **wp)
 						Xput(xs, xp, '\\');
 					}
 				} else if ((unsigned int)c > 0xFF) {
-					/* generic function returned Unicode */
+					/* generic function returned UCS */
 					po.ts[utf_wctomb(po.ts, c - 0x100)] = 0;
 					c = 0;
 					do {
--- a/misc.c
+++ b/misc.c
@ -32,7 +32,7 @@
 #include <grp.h>
 #endif

-__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.292 2018/03/17 22:46:09 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.293 2018/08/10 02:53:35 tg Exp $");

 #define KSH_CHVT_FLAG
 #ifdef MKSH_SMALL
@ -2456,7 +2456,7 @@ getrusage(int what, struct rusage *ru)
 * and fp (put back a char) for backslash escapes,
 * assuming the first call to *fg gets the char di-
 * rectly after the backslash; return the character
- * (0..0xFF), Unicode (wc + 0x100), or -1 if no known
+ * (0..0xFF), UCS (wc + 0x100), or -1 if no known
 * escape sequence was found
 */
 int
@ -2538,9 +2538,9 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 		/**
 		 * x:	look for a hexadecimal number with up to
 		 *	two (C style: arbitrary) digits; convert
-		 *	to raw octet (C style: Unicode if >0xFF)
+		 *	to raw octet (C style: UCS if >0xFF)
 		 * u/U:	look for a hexadecimal number with up to
-		 *	four (U: eight) digits; convert to Unicode
+		 *	four (U: eight) digits; convert to UCS
 		 */
 		wc = 0;
 		n = 0;
@ -2562,7 +2562,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 		if (!n)
 			goto unknown_escape;
 		if ((cstyle && wc > 0xFF) || fc != 'x')
-			/* Unicode marker */
+			/* UCS marker */
 			wc += 0x100;
 		break;
 	case '\'':
--- a/mksh.1
+++ b/mksh.1
@ -1,4 +1,4 @@
-.\" $MirOS: src/bin/mksh/mksh.1,v 1.458 2018/07/15 17:21:22 tg Exp $
+.\" $MirOS: src/bin/mksh/mksh.1,v 1.459 2018/08/10 02:53:36 tg Exp $
 .\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $
 .\"-
 .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
@ -77,7 +77,7 @@
 .\" with -mandoc, it might implement .Mx itself, but we want to
 .\" use our own definition. And .Dd must come *first*, always.
 .\"
-.Dd $Mdocdate: July 15 2018 $
+.Dd $Mdocdate: August 10 2018 $
 .\"
 .\" Check which macro package we use, and do other -mdoc setup.
 .\"
@ -1047,7 +1047,7 @@ and
 .Dq Li \eu#### ,
 .Dq #
 means a hexadecimal digit, of which there may be none up to four or eight;
-these escapes translate a Unicode codepoint to UTF-8.
+these escapes translate a Universal Coded Character Set codepoint to UTF-8.
 Furthermore,
 .Dq Li \eE
 and
@ -1083,7 +1083,7 @@ and yield raw octets; hexadecimal sequences
 greedily eat up as many hexadecimal digits
 .Dq #
 as they can and terminate with the first non-hexadecimal digit;
-these translate a Unicode codepoint to UTF-8.
+these translate a Universal Coded Character Set codepoint to UTF-8.
 The sequence
 .Dq Li \ec# ,
 where
@ -2652,7 +2652,8 @@ as required by the standard), as that's unsafe to do.
 As a special
 .Nm mksh
 extension, numbers to the base of one are treated as either (8-bit
-transparent) ASCII or Unicode codepoints, depending on the shell's
+transparent) ASCII or Universal Coded Character Set codepoints,
+depending on the shell's
 .Ic utf8\-mode
 flag (current setting).
 The
@ -2665,7 +2666,7 @@ instead of
 is also supported.
 Note that NUL bytes (integral value of zero) cannot be used.
 An unset or empty parameter evaluates to 0 in integer context.
-In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
+In UTF-8 mode, raw octets are mapped into the range EF80..EFFF as in
 OPTU-8, which is in the PUA and has been assigned by CSUR for this use.
 If more than one octet in ASCII mode, or a sequence of more than one
 octet not forming a valid and minimal CESU-8 sequence is passed, the
@ -6658,7 +6659,7 @@ locale.
 .Ic utf8\-mode
 .Em must
 be disabled in POSIX mode, and it
-only supports the Unicode BMP (Basic Multilingual Plane) and maps
+only supports the BMP (Basic Multilingual Plane) of UCS and maps
 raw octets into the U+EF80..U+EFFF wide character range; compare
 .Sx Arithmetic expressions .
 The following
@ -6679,7 +6680,7 @@ case ${KSH_VERSION:\-} in
 	esac ;;
 esac
 .Ed
-In near future, (Unicode) locale tracking will be implemented though.
+In near future, (UTF-8) locale tracking will be implemented though.
 .Pp
 See also the FAQ below.
 .Sh BUGS
--- a/sh.h
+++ b/sh.h
@ -182,7 +182,7 @@
 #endif

 #ifdef EXTERN
-__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.864 2018/07/15 17:21:23 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.865 2018/08/10 02:53:37 tg Exp $");
 #endif
 #define MKSH_VERSION "R56 2018/07/15"

@ -783,7 +783,7 @@ enum sh_flag {
 };

 #define Flag(f)	(shell_flags[(int)(f)])
-#define UTFMODE	Flag(FUNICODE)
+#define UTFMODE	Flag(FUNNYCODE)

 /*
 * parsing & execution environment
--- a/sh_flags.opt
+++ b/sh_flags.opt
@ -19,7 +19,7 @@
 */

@SHFLAGS_DEFNS
-__RCSID("$MirOS: src/bin/mksh/sh_flags.opt,v 1.5 2017/02/18 02:33:15 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/sh_flags.opt,v 1.6 2018/08/10 02:53:39 tg Exp $");
@SHFLAGS_ENUMS
 #define FN(sname,cname,flags,ochar)	cname,
 #define F0(sname,cname,flags,ochar)	cname = 0,
@ -153,7 +153,7 @@ FN("trackall", FTRACKALL, OF_ANY

 /* -U	enable UTF-8 processing (non-standard) */
 >U|
-FN("utf8-mode", FUNICODE, OF_ANY
+FN("utf8-mode", FUNNYCODE, OF_ANY

 /* -v	echo input */
 >v|
--- a/shf.c
+++ b/shf.c
@ -27,7 +27,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.97 2018/01/14 01:28:16 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.98 2018/08/10 02:53:39 tg Exp $");

 /* flags to shf_emptybuf() */
 #define EB_READSW	0x01	/* about to switch to reading */
@ -1304,7 +1304,7 @@ ebcdic_init(void)
 		 * and the C1 control characters other than NEL are
 		 * hopeless, but we map EBCDIC NEL to ASCII LF so we
 		 * cannot even use C1 NEL.
-		 * If ever we map to Unicode, bump the table width to
+		 * If ever we map to UCS, bump the table width to
 		 * an unsigned int, and or the raw unconverted EBCDIC
 		 * values with 0x01000000 instead.
 		 */
--- a/tree.c
+++ b/tree.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.95 2018/01/14 00:03:05 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.96 2018/08/10 02:53:39 tg Exp $");

 #define INDENT	8

@ -808,7 +808,7 @@ vistree(char *dst, size_t sz, struct op *t)
 		c = ksh_unctrl(c);
 	} else if (UTFMODE && rtt2asc(c) > 0x7F) {
 		/* better not try to display broken multibyte chars */
-		/* also go easy on the Unicode: no U+FFFD here */
+		/* also go easy on the UCS: no U+FFFD here */
 		c = ORD('?');
 	}
 	*dst++ = c;