From 042086e49e18469086f0c6144e8297863cbb59d2 Mon Sep 17 00:00:00 2001 From: tg Date: Thu, 4 Dec 2008 18:11:08 +0000 Subject: [PATCH] rename utf8-hack to utf8-mode (use set -U or set +U instead, anyway) and announce less hackish things --- check.t | 50 +++++++++++++++++++++++++------------------------- edit.c | 24 ++++++++++++------------ lex.c | 6 +++--- main.c | 8 ++++---- misc.c | 4 ++-- mksh.1 | 12 ++++++------ sh.h | 11 ++++++----- var.c | 6 +++--- 8 files changed, 61 insertions(+), 60 deletions(-) diff --git a/check.t b/check.t index 2e699fa..9f4c699 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.249 2008/12/02 13:19:28 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.250 2008/12/04 18:11:03 tg Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ @@ -7,7 +7,7 @@ # http://www.research.att.com/~gsf/public/ifs.sh expected-stdout: - @(#)MIRBSD KSH R36 2008/12/02 + @(#)MIRBSD KSH R36 2008/12/04 description: Check version of shell. stdin: @@ -4478,21 +4478,21 @@ expected-stderr-pattern: --- name: utf8bom-3 description: - Reading the UTF-8 BOM should enable the utf8-hack flag + Reading the UTF-8 BOM should enable the utf8-mode flag stdin: - "$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then print on; else print off; fi' - "$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then print on; else print off; fi' + "$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print on; else print off; fi' + "$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print on; else print off; fi' expected-stdout: off on --- name: utf8opt-1a description: - Check that the utf8-hack flag is not set at non-interactive startup + Check that the utf8-mode flag is not set at non-interactive startup category: !os:hpux env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8! stdin: - if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then + if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print is set else print is not set @@ -4502,11 +4502,11 @@ expected-stdout: --- name: utf8opt-1b description: - Check that the utf8-hack flag is not set at non-interactive startup + Check that the utf8-mode flag is not set at non-interactive startup category: os:hpux env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8! stdin: - if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then + if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print is set else print is not set @@ -4516,12 +4516,12 @@ expected-stdout: --- name: utf8opt-2a description: - Check that the utf8-hack flag is set at interactive startup + Check that the utf8-mode flag is set at interactive startup category: !os:hpux arguments: !-i! env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8! stdin: - if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then + if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print is set else print is not set @@ -4533,12 +4533,12 @@ expected-stderr-pattern: --- name: utf8opt-2b description: - Check that the utf8-hack flag is set at interactive startup + Check that the utf8-mode flag is set at interactive startup category: os:hpux arguments: !-i! env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8! stdin: - if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then + if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print is set else print is not set @@ -4804,7 +4804,7 @@ name: integer-base-one-1 description: check if the use of fake integer base 1 works stdin: - set -o utf8-hack + set -U typeset -Uui16 i0=1# i1=1#€ typeset -i1 o0a=64 typeset -i1 o1a=0x263A @@ -4833,7 +4833,7 @@ name: integer-base-one-2a description: check if the use of fake integer base 1 stops at correct characters stdin: - set -o utf8-hack + set -U integer x=1#foo print /$x/ expected-stderr-pattern: @@ -4844,7 +4844,7 @@ name: integer-base-one-2b description: check if the use of fake integer base 1 stops at correct characters stdin: - set -o utf8-hack + set -U integer x=1# print /$x/ expected-stderr-pattern: @@ -4855,7 +4855,7 @@ name: integer-base-one-2c1 description: check if the use of fake integer base 1 stops at correct characters stdin: - set -o utf8-hack + set -U integer x=1#… print /$x/ expected-stdout: @@ -4865,7 +4865,7 @@ name: integer-base-one-2c2 description: check if the use of fake integer base 1 stops at correct characters stdin: - set +o utf8-hack + set +U integer x=1#… print /$x/ expected-stderr-pattern: @@ -4876,7 +4876,7 @@ name: integer-base-one-2d1 description: check if the use of fake integer base 1 handles octets okay stdin: - set -o utf8-hack + set -U typeset -i16 x=1# print /$x/ # invalid utf-8 expected-stdout: @@ -4886,7 +4886,7 @@ name: integer-base-one-2d2 description: check if the use of fake integer base 1 handles octets stdin: - set -o utf8-hack + set -U typeset -i16 x=1# print /$x/ # invalid 2-byte expected-stdout: @@ -4896,7 +4896,7 @@ name: integer-base-one-2d3 description: check if the use of fake integer base 1 handles octets stdin: - set -o utf8-hack + set -U typeset -i16 x=1# print /$x/ # invalid 2-byte expected-stdout: @@ -4906,7 +4906,7 @@ name: integer-base-one-2d4 description: check if the use of fake integer base 1 stops at invalid input stdin: - set -o utf8-hack + set -U typeset -i16 x=1# print /$x/ # invalid 3-byte expected-stderr-pattern: @@ -4917,7 +4917,7 @@ name: integer-base-one-2d5 description: check if the use of fake integer base 1 stops at invalid input stdin: - set -o utf8-hack + set -U typeset -i16 x=1# print /$x/ # non-minimalistic expected-stderr-pattern: @@ -4928,7 +4928,7 @@ name: integer-base-one-2d6 description: check if the use of fake integer base 1 stops at invalid input stdin: - set -o utf8-hack + set -U typeset -i16 x=1# print /$x/ # non-minimalistic expected-stderr-pattern: @@ -5006,7 +5006,7 @@ name: integer-base-one-3b description: some sample code for hexdumping Unicode stdin: - set -o utf8-hack + set -U { print 'Hello, World!\\\nこんにちは!' typeset -Uui16 i=0x100 diff --git a/edit.c b/edit.c index 9502d3e..ecb493a 100644 --- a/edit.c +++ b/edit.c @@ -5,7 +5,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.146 2008/11/15 09:00:18 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.147 2008/12/04 18:11:04 tg Exp $"); /* tty driver characters we are interested in */ typedef struct { @@ -753,7 +753,7 @@ utf_widthadj(const char *src, const char **dst) unsigned int wc; int width; - if (!Flag(FUTFHACK) || (len = utf_mbtowc(&wc, src)) == (size_t)-1 || + if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 || wc == 0) len = width = 1; else @@ -771,7 +771,7 @@ utf_mbswidth(const char *s) unsigned int wc; int width = 0, cw; - if (!Flag(FUTFHACK)) + if (!UTFMODE) return (strlen(s)); while (*s) @@ -1385,7 +1385,7 @@ x_e_getmbc(char *sbuf) buf[pos++] = c = x_e_getc(); if (c == -1) return (-1); - if (Flag(FUTFHACK)) { + if (UTFMODE) { if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) { c = x_e_getc(); if (c == -1) @@ -1504,7 +1504,7 @@ x_insert(int c) x_e_putc2(7); return KSTD; } - if (Flag(FUTFHACK)) { + if (UTFMODE) { if (((c & 0xC0) == 0x80) && left) { str[pos++] = c; if (!--left) { @@ -1792,7 +1792,7 @@ x_fword(int move) static void x_goto(char *cp) { - if (Flag(FUTFHACK)) + if (UTFMODE) while ((cp > xbuf) && ((*cp & 0xC0) == 0x80)) --cp; if (cp < xbp || cp >= utf_skipcols(xbp, x_displen)) { @@ -1814,7 +1814,7 @@ x_bs3(char **p) int i; (*p)--; - if (Flag(FUTFHACK)) + if (UTFMODE) while (((unsigned char)**p & 0xC0) == 0x80) (*p)--; @@ -1837,7 +1837,7 @@ x_size2(char *cp, char **dcp) { int c = *(unsigned char *)cp; - if (Flag(FUTFHACK) && (c > 0x7F)) + if (UTFMODE && (c > 0x7F)) return (utf_widthadj(cp, (const char **)dcp)); if (dcp) *dcp = cp + 1; @@ -1876,7 +1876,7 @@ x_zotc3(char **cp) { unsigned char c = **(unsigned char **)cp; - if (c == 0xC2 && Flag(FUTFHACK)) { + if (c == 0xC2 && UTFMODE) { unsigned char c2 = ((unsigned char *)*cp)[1]; if (c2 >= 0x80 && c2 < 0xA0) { @@ -2911,7 +2911,7 @@ x_adjust(void) */ if ((xbp = xcp - (x_displen / 2)) < xbuf) xbp = xbuf; - if (Flag(FUTFHACK)) + if (UTFMODE) while ((xbp > xbuf) && ((*xbp & 0xC0) == 0x80)) --xbp; xlp_valid = false; @@ -2951,7 +2951,7 @@ x_e_putc2(int c) if (c == '\r' || c == '\n') x_col = 0; if (x_col < xx_cols) { - if (Flag(FUTFHACK) && (c > 0x7F)) { + if (UTFMODE && (c > 0x7F)) { char utf_tmp[3]; size_t x; @@ -2992,7 +2992,7 @@ x_e_putc3(const char **cp) if (c == '\r' || c == '\n') x_col = 0; if (x_col < xx_cols) { - if (Flag(FUTFHACK) && (c > 0x7F)) { + if (UTFMODE && (c > 0x7F)) { char *cp2; width = utf_widthadj(*cp, (const char **)&cp2); diff --git a/lex.c b/lex.c index c516880..8dc9ba8 100644 --- a/lex.c +++ b/lex.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.77 2008/12/02 12:39:37 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.78 2008/12/04 18:11:05 tg Exp $"); /* * states while lexing word @@ -1164,7 +1164,7 @@ getsc__(void) (((const unsigned char *)(s->str))[0] == 0xBB) && (((const unsigned char *)(s->str))[1] == 0xBF)) { s->str += 2; - Flag(FUTFHACK) = 1; + UTFMODE = 1; goto getsc_again; } } @@ -1332,7 +1332,7 @@ dopprompt(const char *cp, int ntruncate, int doprint) columns--; } else if (*cp == delimiter) indelimit = !indelimit; - else if (Flag(FUTFHACK) && ((unsigned char)*cp > 0x7F)) { + else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { const char *cp2; columns += utf_widthadj(cp, &cp2); if (doprint && (indelimit || diff --git a/main.c b/main.c index 3462e4d..7fb9d60 100644 --- a/main.c +++ b/main.c @@ -13,7 +13,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/main.c,v 1.114 2008/11/30 10:33:39 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/main.c,v 1.115 2008/12/04 18:11:06 tg Exp $"); extern char **environ; @@ -311,7 +311,7 @@ main(int argc, const char *argv[]) #define isuc(x) (((x) != NULL) && \ (stristr((x), "UTF-8") || stristr((x), "utf8"))) /* Check if we're in a UTF-8 locale */ - if (!Flag(FUTFHACK)) { + if (!UTFMODE) { const char *ccp; #if HAVE_SETLOCALE_CTYPE @@ -328,11 +328,11 @@ main(int argc, const char *argv[]) ccp = getenv("LANG"); } #endif - Flag(FUTFHACK) = isuc(ccp); + UTFMODE = isuc(ccp); } #undef isuc #else - Flag(FUTFHACK) = 1; + UTFMODE = 1; #endif x_init(); } diff --git a/misc.c b/misc.c index ffbd682..49c23ab 100644 --- a/misc.c +++ b/misc.c @@ -6,7 +6,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.91 2008/11/12 00:54:50 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.92 2008/12/04 18:11:06 tg Exp $"); #undef USE_CHVT #if defined(TIOCSCTTY) && !defined(MKSH_SMALL) @@ -111,7 +111,7 @@ const struct shoption options[] = { { "restricted", 'r', OF_CMDLINE }, { "stdin", 's', OF_CMDLINE }, /* pseudo non-standard */ { "trackall", 'h', OF_ANY }, - { "utf8-hack", 'U', OF_ANY }, /* non-standard */ + { "utf8-mode", 'U', OF_ANY }, /* non-standard */ { "verbose", 'v', OF_ANY }, #ifndef MKSH_NOVI { "vi", 0, OF_ANY }, diff --git a/mksh.1 b/mksh.1 index 2d4b6fa..27923a3 100644 --- a/mksh.1 +++ b/mksh.1 @@ -1,4 +1,4 @@ -.\" $MirOS: src/bin/mksh/mksh.1,v 1.144 2008/10/24 21:27:20 tg Exp $ +.\" $MirOS: src/bin/mksh/mksh.1,v 1.145 2008/12/04 18:11:07 tg Exp $ .\" $OpenBSD: ksh.1,v 1.122 2008/05/17 23:31:52 sobrado Exp $ .\"- .\" Try to make GNU groff and AT&T nroff more compatible @@ -30,7 +30,7 @@ .el .xD \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 .. .\"- -.Dd $Mdocdate: October 24 2008 $ +.Dd $Mdocdate: December 4 2008 $ .Dt MKSH 1 .Os MirBSD .Sh NAME @@ -2159,7 +2159,7 @@ As a special .Nm mksh extension, numbers to the base of one are treated as either (8-bit transparent) ASCII or Unicode codepoints, depending on the shell's -.Ic utf8\-hack +.Ic utf8\-mode flag (current setting). Note that NUL bytes (integral value of zero) cannot be used. In Unicode mode, raw octets are mapped into the range EF80..EFFF as in @@ -3468,7 +3468,7 @@ the positional parameters (or to array if .Fl A is used). -.It Fl U \*(Ba Ic utf8\-hack +.It Fl U \*(Ba Ic utf8\-mode Enable UTF-8 support in the .Sx Emacs editing mode and internal string handling functions. @@ -3486,7 +3486,7 @@ and at least one of these returns something that matches .Dq UTF\-8 or .Dq utf8 , -or if the input begins with a Byte Order Mark. +or if the input begins with a UTF-8 Byte Order Mark. .It Fl u \*(Ba Ic nounset Referencing of an unset parameter is treated as an error, unless one of the .Ql \- , @@ -5559,7 +5559,7 @@ and many other persons, and is currently maintained by .An Thorsten Glaser Aq tg@mirbsd.de . .Sh BUGS This document attempts to describe -.Nm mksh\ R36 +.Nm mksh\ R37 and up, compiled without any options impacting functionality, such as .Dv MKSH_SMALL , diff --git a/sh.h b/sh.h index 23cffbe..58778f4 100644 --- a/sh.h +++ b/sh.h @@ -103,9 +103,9 @@ #define __SCCSID(x) __IDSTRING(sccsid,x) #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.264 2008/12/02 13:20:39 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.265 2008/12/04 18:11:08 tg Exp $"); #endif -#define MKSH_VERSION "R36 2008/12/02" +#define MKSH_VERSION "R36 2008/12/04" #ifndef MKSH_INCLUDES_ONLY @@ -341,7 +341,7 @@ char *ucstrstr(char *, const char *); #define utf_ptradjx(src, dst) do { \ size_t utf_ptradjx_len; \ \ - if (!Flag(FUTFHACK) || \ + if (!UTFMODE || \ *(const unsigned char *)(src) < 0xC2 || \ (utf_ptradjx_len = utf_mbtowc(NULL, (src))) == (size_t)-1) \ utf_ptradjx_len = 1; \ @@ -489,7 +489,7 @@ enum sh_flag { FRESTRICTED, /* -r: restricted shell */ FSTDIN, /* -s: (invocation) parse stdin */ FTRACKALL, /* -h: create tracked aliases for all commands */ - FUTFHACK, /* -U: utf-8 hack for command line editing */ + FUTFMODE, /* -U: enable utf-8 processing */ FVERBOSE, /* -v: echo input */ #ifndef MKSH_NOVI FVI, /* vi command editing */ @@ -503,6 +503,7 @@ enum sh_flag { }; #define Flag(f) (shell_flags[(int)(f)]) +#define UTFMODE Flag(FUTFMODE) EXTERN char shell_flags[FNFLAGS]; @@ -1264,7 +1265,7 @@ void afree(void *, PArea); /* can take NULL */ void x_init(void); int x_read(char *, size_t); int x_bind(const char *, const char *, int, int); -/* UTF-8 hack stuff */ +/* UTF-8 stuff */ size_t utf_mbtowc(unsigned int *, const char *); size_t utf_wctomb(char *, unsigned int); int utf_widthadj(const char *, const char **); diff --git a/var.c b/var.c index 8e540c6..af6f733 100644 --- a/var.c +++ b/var.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/var.c,v 1.63 2008/11/30 10:33:40 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/var.c,v 1.64 2008/12/04 18:11:08 tg Exp $"); /* * Variables @@ -303,7 +303,7 @@ str_val(struct tbl *vp) *(s = strbuf) = '1'; s[1] = '#'; - if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80)) + if (!UTFMODE || ((n & 0xFF80) == 0xEF80)) s[2] = n & 0xFF; else sz = utf_wctomb(s + 2, n); @@ -455,7 +455,7 @@ getint(struct tbl *vp, long int *nump, bool arith) if (base == 1) { unsigned int wc; - if (!Flag(FUTFHACK)) + if (!UTFMODE) wc = *(unsigned char *)s; else if (utf_mbtowc(&wc, s) == (size_t)-1) wc = 0xEF00 + *(unsigned char *)s;