rename utf8-hack to utf8-mode (use set -U or set +U instead, anyway)

and announce less hackish things
2008-12-04 18:11:08 +00:00
parent a19bf7253a
commit 042086e49e
8 changed files with 61 additions and 60 deletions
--- a/check.t
+++ b/check.t
@ -1,4 +1,4 @@
-# $MirOS: src/bin/mksh/check.t,v 1.249 2008/12/02 13:19:28 tg Exp $
+# $MirOS: src/bin/mksh/check.t,v 1.250 2008/12/04 18:11:03 tg Exp $
 # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
 # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
 # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -7,7 +7,7 @@
 # http://www.research.att.com/~gsf/public/ifs.sh
 expected-stdout:
-	@(#)MIRBSD KSH R36 2008/12/02
+	@(#)MIRBSD KSH R36 2008/12/04
 description:
 	Check version of shell.
 stdin:
@ -4478,21 +4478,21 @@ expected-stderr-pattern:
 ---
 name: utf8bom-3
 description:
-	Reading the UTF-8 BOM should enable the utf8-hack flag
+	Reading the UTF-8 BOM should enable the utf8-mode flag
 stdin:
-	"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then print on; else print off; fi'
+	"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print on; else print off; fi'
-	"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then print on; else print off; fi'
+	"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print on; else print off; fi'
 expected-stdout:
 	off
 	on
 ---
 name: utf8opt-1a
 description:
-	Check that the utf8-hack flag is not set at non-interactive startup
+	Check that the utf8-mode flag is not set at non-interactive startup
 category: !os:hpux
 env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
 stdin:
-	if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
+	if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
 		print is set
 	else
 		print is not set
@ -4502,11 +4502,11 @@ expected-stdout:
 ---
 name: utf8opt-1b
 description:
-	Check that the utf8-hack flag is not set at non-interactive startup
+	Check that the utf8-mode flag is not set at non-interactive startup
 category: os:hpux
 env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
 stdin:
-	if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
+	if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
 		print is set
 	else
 		print is not set
@ -4516,12 +4516,12 @@ expected-stdout:
 ---
 name: utf8opt-2a
 description:
-	Check that the utf8-hack flag is set at interactive startup
+	Check that the utf8-mode flag is set at interactive startup
 category: !os:hpux
 arguments: !-i!
 env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
 stdin:
-	if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
+	if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
 		print is set
 	else
 		print is not set
@ -4533,12 +4533,12 @@ expected-stderr-pattern:
 ---
 name: utf8opt-2b
 description:
-	Check that the utf8-hack flag is set at interactive startup
+	Check that the utf8-mode flag is set at interactive startup
 category: os:hpux
 arguments: !-i!
 env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
 stdin:
-	if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
+	if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
 		print is set
 	else
 		print is not set
@ -4804,7 +4804,7 @@ name: integer-base-one-1
 description:
 	check if the use of fake integer base 1 works
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -Uui16 i0=1#<23> i1=1#€
 	typeset -i1 o0a=64
 	typeset -i1 o1a=0x263A
@ -4833,7 +4833,7 @@ name: integer-base-one-2a
 description:
 	check if the use of fake integer base 1 stops at correct characters
 stdin:
-	set -o utf8-hack
+	set -U
 	integer x=1#foo
 	print /$x/
 expected-stderr-pattern:
@ -4844,7 +4844,7 @@ name: integer-base-one-2b
 description:
 	check if the use of fake integer base 1 stops at correct characters
 stdin:
-	set -o utf8-hack
+	set -U
 	integer x=1#<23><>
 	print /$x/
 expected-stderr-pattern:
@ -4855,7 +4855,7 @@ name: integer-base-one-2c1
 description:
 	check if the use of fake integer base 1 stops at correct characters
 stdin:
-	set -o utf8-hack
+	set -U
 	integer x=1#…
 	print /$x/
 expected-stdout:
@ -4865,7 +4865,7 @@ name: integer-base-one-2c2
 description:
 	check if the use of fake integer base 1 stops at correct characters
 stdin:
-	set +o utf8-hack
+	set +U
 	integer x=1#…
 	print /$x/
 expected-stderr-pattern:
@ -4876,7 +4876,7 @@ name: integer-base-one-2d1
 description:
 	check if the use of fake integer base 1 handles octets okay
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -i16 x=1#<23>
 	print /$x/	# invalid utf-8
 expected-stdout:
@ -4886,7 +4886,7 @@ name: integer-base-one-2d2
 description:
 	check if the use of fake integer base 1 handles octets
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -i16 x=1#<23>
 	print /$x/	# invalid 2-byte
 expected-stdout:
@ -4896,7 +4896,7 @@ name: integer-base-one-2d3
 description:
 	check if the use of fake integer base 1 handles octets
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -i16 x=1#<23>
 	print /$x/	# invalid 2-byte
 expected-stdout:
@ -4906,7 +4906,7 @@ name: integer-base-one-2d4
 description:
 	check if the use of fake integer base 1 stops at invalid input
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -i16 x=1#<23><><EFBFBD>
 	print /$x/	# invalid 3-byte
 expected-stderr-pattern:
@ -4917,7 +4917,7 @@ name: integer-base-one-2d5
 description:
 	check if the use of fake integer base 1 stops at invalid input
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -i16 x=1#<23><>
 	print /$x/	# non-minimalistic
 expected-stderr-pattern:
@ -4928,7 +4928,7 @@ name: integer-base-one-2d6
 description:
 	check if the use of fake integer base 1 stops at invalid input
 stdin:
-	set -o utf8-hack
+	set -U
 	typeset -i16 x=1#<23><><EFBFBD>
 	print /$x/	# non-minimalistic
 expected-stderr-pattern:
@ -5006,7 +5006,7 @@ name: integer-base-one-3b
 description:
 	some sample code for hexdumping Unicode
 stdin:
-	set -o utf8-hack
+	set -U
 	{
 		print 'Hello, World!\\\nこんにちは！'
 		typeset -Uui16 i=0x100
--- a/edit.c
+++ b/edit.c
@ -5,7 +5,7 @@
 #include "sh.h"
-__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.146 2008/11/15 09:00:18 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.147 2008/12/04 18:11:04 tg Exp $");
 /* tty driver characters we are interested in */
 typedef struct {
@ -753,7 +753,7 @@ utf_widthadj(const char *src, const char **dst)
 	unsigned int wc;
 	int width;
-	if (!Flag(FUTFHACK) || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
+	if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
 	    wc == 0)
 		len = width = 1;
 	else
@ -771,7 +771,7 @@ utf_mbswidth(const char *s)
 	unsigned int wc;
 	int width = 0, cw;
-	if (!Flag(FUTFHACK))
+	if (!UTFMODE)
 		return (strlen(s));
 	while (*s)
@ -1385,7 +1385,7 @@ x_e_getmbc(char *sbuf)
 	buf[pos++] = c = x_e_getc();
 	if (c == -1)
 		return (-1);
-	if (Flag(FUTFHACK)) {
+	if (UTFMODE) {
 		if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
 			c = x_e_getc();
 			if (c == -1)
@ -1504,7 +1504,7 @@ x_insert(int c)
 		x_e_putc2(7);
 		return KSTD;
 	}
-	if (Flag(FUTFHACK)) {
+	if (UTFMODE) {
 		if (((c & 0xC0) == 0x80) && left) {
 			str[pos++] = c;
 			if (!--left) {
@ -1792,7 +1792,7 @@ x_fword(int move)
 static void
 x_goto(char *cp)
 {
-	if (Flag(FUTFHACK))
+	if (UTFMODE)
 		while ((cp > xbuf) && ((*cp & 0xC0) == 0x80))
 			--cp;
 	if (cp < xbp || cp >= utf_skipcols(xbp, x_displen)) {
@ -1814,7 +1814,7 @@ x_bs3(char **p)
 	int i;
 	(*p)--;
-	if (Flag(FUTFHACK))
+	if (UTFMODE)
 		while (((unsigned char)**p & 0xC0) == 0x80)
 			(*p)--;
@ -1837,7 +1837,7 @@ x_size2(char *cp, char **dcp)
 {
 	int c = *(unsigned char *)cp;
-	if (Flag(FUTFHACK) && (c > 0x7F))
+	if (UTFMODE && (c > 0x7F))
 		return (utf_widthadj(cp, (const char **)dcp));
 	if (dcp)
 		*dcp = cp + 1;
@ -1876,7 +1876,7 @@ x_zotc3(char **cp)
 {
 	unsigned char c = **(unsigned char **)cp;
-	if (c == 0xC2 && Flag(FUTFHACK)) {
+	if (c == 0xC2 && UTFMODE) {
 		unsigned char c2 = ((unsigned char *)*cp)[1];
 		if (c2 >= 0x80 && c2 < 0xA0) {
@ -2911,7 +2911,7 @@ x_adjust(void)
 	 */
 	if ((xbp = xcp - (x_displen / 2)) < xbuf)
 		xbp = xbuf;
-	if (Flag(FUTFHACK))
+	if (UTFMODE)
 		while ((xbp > xbuf) && ((*xbp & 0xC0) == 0x80))
 			--xbp;
 	xlp_valid = false;
@ -2951,7 +2951,7 @@ x_e_putc2(int c)
 	if (c == '\r' || c == '\n')
 		x_col = 0;
 	if (x_col < xx_cols) {
-		if (Flag(FUTFHACK) && (c > 0x7F)) {
+		if (UTFMODE && (c > 0x7F)) {
 			char utf_tmp[3];
 			size_t x;
@ -2992,7 +2992,7 @@ x_e_putc3(const char **cp)
 	if (c == '\r' || c == '\n')
 		x_col = 0;
 	if (x_col < xx_cols) {
-		if (Flag(FUTFHACK) && (c > 0x7F)) {
+		if (UTFMODE && (c > 0x7F)) {
 			char *cp2;
 			width = utf_widthadj(*cp, (const char **)&cp2);
--- a/lex.c
+++ b/lex.c
@ -2,7 +2,7 @@
 #include "sh.h"
-__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.77 2008/12/02 12:39:37 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.78 2008/12/04 18:11:05 tg Exp $");
 /*
 * states while lexing word
@ -1164,7 +1164,7 @@ getsc__(void)
 		    (((const unsigned char *)(s->str))[0] == 0xBB) &&
 		    (((const unsigned char *)(s->str))[1] == 0xBF)) {
 			s->str += 2;
-			Flag(FUTFHACK) = 1;
+			UTFMODE = 1;
 			goto getsc_again;
 		}
 	}
@ -1332,7 +1332,7 @@ dopprompt(const char *cp, int ntruncate, int doprint)
 				columns--;
 		} else if (*cp == delimiter)
 			indelimit = !indelimit;
-		else if (Flag(FUTFHACK) && ((unsigned char)*cp > 0x7F)) {
+		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
 			const char *cp2;
 			columns += utf_widthadj(cp, &cp2);
 			if (doprint && (indelimit ||
--- a/main.c
+++ b/main.c
@ -13,7 +13,7 @@
 #include <locale.h>
 #endif
-__RCSID("$MirOS: src/bin/mksh/main.c,v 1.114 2008/11/30 10:33:39 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/main.c,v 1.115 2008/12/04 18:11:06 tg Exp $");
 extern char **environ;
@ -311,7 +311,7 @@ main(int argc, const char *argv[])
 #define isuc(x)	(((x) != NULL) && \
 		    (stristr((x), "UTF-8") || stristr((x), "utf8")))
 		/* Check if we're in a UTF-8 locale */
-		if (!Flag(FUTFHACK)) {
+		if (!UTFMODE) {
 			const char *ccp;
 #if HAVE_SETLOCALE_CTYPE
@ -328,11 +328,11 @@ main(int argc, const char *argv[])
 					ccp = getenv("LANG");
 			}
 #endif
-			Flag(FUTFHACK) = isuc(ccp);
+			UTFMODE = isuc(ccp);
 		}
 #undef isuc
 #else
-		Flag(FUTFHACK) = 1;
+		UTFMODE = 1;
 #endif
 		x_init();
 	}
--- a/misc.c
+++ b/misc.c
@ -6,7 +6,7 @@
 #include <grp.h>
 #endif
-__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.91 2008/11/12 00:54:50 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.92 2008/12/04 18:11:06 tg Exp $");
 #undef USE_CHVT
 #if defined(TIOCSCTTY) && !defined(MKSH_SMALL)
@ -111,7 +111,7 @@ const struct shoption options[] = {
 	{ "restricted",	'r',	    OF_CMDLINE },
 	{ "stdin",	's',	    OF_CMDLINE }, /* pseudo non-standard */
 	{ "trackall",	'h',		OF_ANY },
-	{ "utf8-hack",	'U',		OF_ANY }, /* non-standard */
+	{ "utf8-mode",	'U',		OF_ANY }, /* non-standard */
 	{ "verbose",	'v',		OF_ANY },
 #ifndef MKSH_NOVI
 	{ "vi",		  0,		OF_ANY },
--- a/mksh.1
+++ b/mksh.1
@ -1,4 +1,4 @@
-.\" $MirOS: src/bin/mksh/mksh.1,v 1.144 2008/10/24 21:27:20 tg Exp $
+.\" $MirOS: src/bin/mksh/mksh.1,v 1.145 2008/12/04 18:11:07 tg Exp $
 .\" $OpenBSD: ksh.1,v 1.122 2008/05/17 23:31:52 sobrado Exp $
 .\"-
 .\" Try to make GNU groff and AT&T nroff more compatible
@ -30,7 +30,7 @@
 .el .xD \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8
 ..
 .\"-
-.Dd $Mdocdate: October 24 2008 $
+.Dd $Mdocdate: December 4 2008 $
 .Dt MKSH 1
 .Os MirBSD
 .Sh NAME
@ -2159,7 +2159,7 @@ As a special
 .Nm mksh
 extension, numbers to the base of one are treated as either (8-bit
 transparent) ASCII or Unicode codepoints, depending on the shell's
-.Ic utf8\-hack
+.Ic utf8\-mode
 flag (current setting).
 Note that NUL bytes (integral value of zero) cannot be used.
 In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
@ -3468,7 +3468,7 @@ the positional parameters (or to array
 if
 .Fl A
 is used).
-.It Fl U \*(Ba Ic utf8\-hack
+.It Fl U \*(Ba Ic utf8\-mode
 Enable UTF-8 support in the
 .Sx Emacs editing mode
 and internal string handling functions.
@ -3486,7 +3486,7 @@ and at least one of these returns something that matches
 .Dq UTF\-8
 or
 .Dq utf8 ,
-or if the input begins with a Byte Order Mark.
+or if the input begins with a UTF-8 Byte Order Mark.
 .It Fl u \*(Ba Ic nounset
 Referencing of an unset parameter is treated as an error, unless one of the
 .Ql \- ,
@ -5559,7 +5559,7 @@ and many other persons, and is currently maintained by
 .An Thorsten Glaser Aq tg@mirbsd.de .
 .Sh BUGS
 This document attempts to describe
-.Nm mksh\ R36
+.Nm mksh\ R37
 and up,
 compiled without any options impacting functionality, such as
 .Dv MKSH_SMALL ,
--- a/sh.h
+++ b/sh.h
@ -103,9 +103,9 @@
 #define __SCCSID(x)	__IDSTRING(sccsid,x)
 #ifdef EXTERN
-__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.264 2008/12/02 13:20:39 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.265 2008/12/04 18:11:08 tg Exp $");
 #endif
-#define MKSH_VERSION "R36 2008/12/02"
+#define MKSH_VERSION "R36 2008/12/04"
 #ifndef MKSH_INCLUDES_ONLY
@ -341,7 +341,7 @@ char *ucstrstr(char *, const char *);
 #define utf_ptradjx(src, dst) do {					\
 	size_t utf_ptradjx_len;						\
 									\
-	if (!Flag(FUTFHACK) ||						\
+	if (!UTFMODE ||							\
 	    *(const unsigned char *)(src) < 0xC2 ||			\
 	    (utf_ptradjx_len = utf_mbtowc(NULL, (src))) == (size_t)-1)	\
 		utf_ptradjx_len = 1;					\
@ -489,7 +489,7 @@ enum sh_flag {
 	FRESTRICTED,	/* -r: restricted shell */
 	FSTDIN,		/* -s: (invocation) parse stdin */
 	FTRACKALL,	/* -h: create tracked aliases for all commands */
-	FUTFHACK,	/* -U: utf-8 hack for command line editing */
+	FUTFMODE,	/* -U: enable utf-8 processing */
 	FVERBOSE,	/* -v: echo input */
 #ifndef MKSH_NOVI
 	FVI,		/* vi command editing */
@ -503,6 +503,7 @@ enum sh_flag {
 };
 #define Flag(f)	(shell_flags[(int)(f)])
 #define UTFMODE	Flag(FUTFMODE)
 EXTERN char shell_flags[FNFLAGS];
@ -1264,7 +1265,7 @@ void afree(void *, PArea);		/* can take NULL */
 void x_init(void);
 int x_read(char *, size_t);
 int x_bind(const char *, const char *, int, int);
-/* UTF-8 hack stuff */
+/* UTF-8 stuff */
 size_t utf_mbtowc(unsigned int *, const char *);
 size_t utf_wctomb(char *, unsigned int);
 int utf_widthadj(const char *, const char **);
--- a/var.c
+++ b/var.c
@ -2,7 +2,7 @@
 #include "sh.h"
-__RCSID("$MirOS: src/bin/mksh/var.c,v 1.63 2008/11/30 10:33:40 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/var.c,v 1.64 2008/12/04 18:11:08 tg Exp $");
 /*
 * Variables
@ -303,7 +303,7 @@ str_val(struct tbl *vp)
 			*(s = strbuf) = '1';
 			s[1] = '#';
-			if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80))
+			if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
 				s[2] = n & 0xFF;
 			else
 				sz = utf_wctomb(s + 2, n);
@ -455,7 +455,7 @@ getint(struct tbl *vp, long int *nump, bool arith)
 			if (base == 1) {
 				unsigned int wc;
-				if (!Flag(FUTFHACK))
+				if (!UTFMODE)
 					wc = *(unsigned char *)s;
 				else if (utf_mbtowc(&wc, s) == (size_t)-1)
 					wc = 0xEF00 + *(unsigned char *)s;