rename utf8-hack to utf8-mode (use set -U or set +U instead, anyway)

and announce less hackish things
This commit is contained in:
tg 2008-12-04 18:11:08 +00:00
parent a19bf7253a
commit 042086e49e
8 changed files with 61 additions and 60 deletions

50
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.249 2008/12/02 13:19:28 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.250 2008/12/04 18:11:03 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -7,7 +7,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R36 2008/12/02
@(#)MIRBSD KSH R36 2008/12/04
description:
Check version of shell.
stdin:
@ -4478,21 +4478,21 @@ expected-stderr-pattern:
---
name: utf8bom-3
description:
Reading the UTF-8 BOM should enable the utf8-hack flag
Reading the UTF-8 BOM should enable the utf8-mode flag
stdin:
"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then print on; else print off; fi'
"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then print on; else print off; fi'
"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print on; else print off; fi'
"$__progname" -c ':; if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then print on; else print off; fi'
expected-stdout:
off
on
---
name: utf8opt-1a
description:
Check that the utf8-hack flag is not set at non-interactive startup
Check that the utf8-mode flag is not set at non-interactive startup
category: !os:hpux
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
print is set
else
print is not set
@ -4502,11 +4502,11 @@ expected-stdout:
---
name: utf8opt-1b
description:
Check that the utf8-hack flag is not set at non-interactive startup
Check that the utf8-mode flag is not set at non-interactive startup
category: os:hpux
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
print is set
else
print is not set
@ -4516,12 +4516,12 @@ expected-stdout:
---
name: utf8opt-2a
description:
Check that the utf8-hack flag is set at interactive startup
Check that the utf8-mode flag is set at interactive startup
category: !os:hpux
arguments: !-i!
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
print is set
else
print is not set
@ -4533,12 +4533,12 @@ expected-stderr-pattern:
---
name: utf8opt-2b
description:
Check that the utf8-hack flag is set at interactive startup
Check that the utf8-mode flag is set at interactive startup
category: os:hpux
arguments: !-i!
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
if [[ $(set +o) = *@(-o utf8-mode)@(| *) ]]; then
print is set
else
print is not set
@ -4804,7 +4804,7 @@ name: integer-base-one-1
description:
check if the use of fake integer base 1 works
stdin:
set -o utf8-hack
set -U
typeset -Uui16 i0=1#ï i1=1#€
typeset -i1 o0a=64
typeset -i1 o1a=0x263A
@ -4833,7 +4833,7 @@ name: integer-base-one-2a
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
set -U
integer x=1#foo
print /$x/
expected-stderr-pattern:
@ -4844,7 +4844,7 @@ name: integer-base-one-2b
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
set -U
integer x=1#À€
print /$x/
expected-stderr-pattern:
@ -4855,7 +4855,7 @@ name: integer-base-one-2c1
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
set -U
integer x=1#…
print /$x/
expected-stdout:
@ -4865,7 +4865,7 @@ name: integer-base-one-2c2
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set +o utf8-hack
set +U
integer x=1#…
print /$x/
expected-stderr-pattern:
@ -4876,7 +4876,7 @@ name: integer-base-one-2d1
description:
check if the use of fake integer base 1 handles octets okay
stdin:
set -o utf8-hack
set -U
typeset -i16 x=1#ÿ
print /$x/ # invalid utf-8
expected-stdout:
@ -4886,7 +4886,7 @@ name: integer-base-one-2d2
description:
check if the use of fake integer base 1 handles octets
stdin:
set -o utf8-hack
set -U
typeset -i16 x=1
print /$x/ # invalid 2-byte
expected-stdout:
@ -4896,7 +4896,7 @@ name: integer-base-one-2d3
description:
check if the use of fake integer base 1 handles octets
stdin:
set -o utf8-hack
set -U
typeset -i16 x=1
print /$x/ # invalid 2-byte
expected-stdout:
@ -4906,7 +4906,7 @@ name: integer-base-one-2d4
description:
check if the use of fake integer base 1 stops at invalid input
stdin:
set -o utf8-hack
set -U
typeset -i16 x=1#ï¿À
print /$x/ # invalid 3-byte
expected-stderr-pattern:
@ -4917,7 +4917,7 @@ name: integer-base-one-2d5
description:
check if the use of fake integer base 1 stops at invalid input
stdin:
set -o utf8-hack
set -U
typeset -i16 x=1#À€
print /$x/ # non-minimalistic
expected-stderr-pattern:
@ -4928,7 +4928,7 @@ name: integer-base-one-2d6
description:
check if the use of fake integer base 1 stops at invalid input
stdin:
set -o utf8-hack
set -U
typeset -i16 x=1#à€€
print /$x/ # non-minimalistic
expected-stderr-pattern:
@ -5006,7 +5006,7 @@ name: integer-base-one-3b
description:
some sample code for hexdumping Unicode
stdin:
set -o utf8-hack
set -U
{
print 'Hello, World!\\\nã<6E>“ã“ã<E2809C>«ã<C2AB>¡ã<C2A1>¯ï¼<C3AF>'
typeset -Uui16 i=0x100

24
edit.c
View File

@ -5,7 +5,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.146 2008/11/15 09:00:18 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.147 2008/12/04 18:11:04 tg Exp $");
/* tty driver characters we are interested in */
typedef struct {
@ -753,7 +753,7 @@ utf_widthadj(const char *src, const char **dst)
unsigned int wc;
int width;
if (!Flag(FUTFHACK) || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
wc == 0)
len = width = 1;
else
@ -771,7 +771,7 @@ utf_mbswidth(const char *s)
unsigned int wc;
int width = 0, cw;
if (!Flag(FUTFHACK))
if (!UTFMODE)
return (strlen(s));
while (*s)
@ -1385,7 +1385,7 @@ x_e_getmbc(char *sbuf)
buf[pos++] = c = x_e_getc();
if (c == -1)
return (-1);
if (Flag(FUTFHACK)) {
if (UTFMODE) {
if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
c = x_e_getc();
if (c == -1)
@ -1504,7 +1504,7 @@ x_insert(int c)
x_e_putc2(7);
return KSTD;
}
if (Flag(FUTFHACK)) {
if (UTFMODE) {
if (((c & 0xC0) == 0x80) && left) {
str[pos++] = c;
if (!--left) {
@ -1792,7 +1792,7 @@ x_fword(int move)
static void
x_goto(char *cp)
{
if (Flag(FUTFHACK))
if (UTFMODE)
while ((cp > xbuf) && ((*cp & 0xC0) == 0x80))
--cp;
if (cp < xbp || cp >= utf_skipcols(xbp, x_displen)) {
@ -1814,7 +1814,7 @@ x_bs3(char **p)
int i;
(*p)--;
if (Flag(FUTFHACK))
if (UTFMODE)
while (((unsigned char)**p & 0xC0) == 0x80)
(*p)--;
@ -1837,7 +1837,7 @@ x_size2(char *cp, char **dcp)
{
int c = *(unsigned char *)cp;
if (Flag(FUTFHACK) && (c > 0x7F))
if (UTFMODE && (c > 0x7F))
return (utf_widthadj(cp, (const char **)dcp));
if (dcp)
*dcp = cp + 1;
@ -1876,7 +1876,7 @@ x_zotc3(char **cp)
{
unsigned char c = **(unsigned char **)cp;
if (c == 0xC2 && Flag(FUTFHACK)) {
if (c == 0xC2 && UTFMODE) {
unsigned char c2 = ((unsigned char *)*cp)[1];
if (c2 >= 0x80 && c2 < 0xA0) {
@ -2911,7 +2911,7 @@ x_adjust(void)
*/
if ((xbp = xcp - (x_displen / 2)) < xbuf)
xbp = xbuf;
if (Flag(FUTFHACK))
if (UTFMODE)
while ((xbp > xbuf) && ((*xbp & 0xC0) == 0x80))
--xbp;
xlp_valid = false;
@ -2951,7 +2951,7 @@ x_e_putc2(int c)
if (c == '\r' || c == '\n')
x_col = 0;
if (x_col < xx_cols) {
if (Flag(FUTFHACK) && (c > 0x7F)) {
if (UTFMODE && (c > 0x7F)) {
char utf_tmp[3];
size_t x;
@ -2992,7 +2992,7 @@ x_e_putc3(const char **cp)
if (c == '\r' || c == '\n')
x_col = 0;
if (x_col < xx_cols) {
if (Flag(FUTFHACK) && (c > 0x7F)) {
if (UTFMODE && (c > 0x7F)) {
char *cp2;
width = utf_widthadj(*cp, (const char **)&cp2);

6
lex.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.77 2008/12/02 12:39:37 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.78 2008/12/04 18:11:05 tg Exp $");
/*
* states while lexing word
@ -1164,7 +1164,7 @@ getsc__(void)
(((const unsigned char *)(s->str))[0] == 0xBB) &&
(((const unsigned char *)(s->str))[1] == 0xBF)) {
s->str += 2;
Flag(FUTFHACK) = 1;
UTFMODE = 1;
goto getsc_again;
}
}
@ -1332,7 +1332,7 @@ dopprompt(const char *cp, int ntruncate, int doprint)
columns--;
} else if (*cp == delimiter)
indelimit = !indelimit;
else if (Flag(FUTFHACK) && ((unsigned char)*cp > 0x7F)) {
else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
const char *cp2;
columns += utf_widthadj(cp, &cp2);
if (doprint && (indelimit ||

8
main.c
View File

@ -13,7 +13,7 @@
#include <locale.h>
#endif
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.114 2008/11/30 10:33:39 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.115 2008/12/04 18:11:06 tg Exp $");
extern char **environ;
@ -311,7 +311,7 @@ main(int argc, const char *argv[])
#define isuc(x) (((x) != NULL) && \
(stristr((x), "UTF-8") || stristr((x), "utf8")))
/* Check if we're in a UTF-8 locale */
if (!Flag(FUTFHACK)) {
if (!UTFMODE) {
const char *ccp;
#if HAVE_SETLOCALE_CTYPE
@ -328,11 +328,11 @@ main(int argc, const char *argv[])
ccp = getenv("LANG");
}
#endif
Flag(FUTFHACK) = isuc(ccp);
UTFMODE = isuc(ccp);
}
#undef isuc
#else
Flag(FUTFHACK) = 1;
UTFMODE = 1;
#endif
x_init();
}

4
misc.c
View File

@ -6,7 +6,7 @@
#include <grp.h>
#endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.91 2008/11/12 00:54:50 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.92 2008/12/04 18:11:06 tg Exp $");
#undef USE_CHVT
#if defined(TIOCSCTTY) && !defined(MKSH_SMALL)
@ -111,7 +111,7 @@ const struct shoption options[] = {
{ "restricted", 'r', OF_CMDLINE },
{ "stdin", 's', OF_CMDLINE }, /* pseudo non-standard */
{ "trackall", 'h', OF_ANY },
{ "utf8-hack", 'U', OF_ANY }, /* non-standard */
{ "utf8-mode", 'U', OF_ANY }, /* non-standard */
{ "verbose", 'v', OF_ANY },
#ifndef MKSH_NOVI
{ "vi", 0, OF_ANY },

12
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.144 2008/10/24 21:27:20 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.145 2008/12/04 18:11:07 tg Exp $
.\" $OpenBSD: ksh.1,v 1.122 2008/05/17 23:31:52 sobrado Exp $
.\"-
.\" Try to make GNU groff and AT&T nroff more compatible
@ -30,7 +30,7 @@
.el .xD \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8
..
.\"-
.Dd $Mdocdate: October 24 2008 $
.Dd $Mdocdate: December 4 2008 $
.Dt MKSH 1
.Os MirBSD
.Sh NAME
@ -2159,7 +2159,7 @@ As a special
.Nm mksh
extension, numbers to the base of one are treated as either (8-bit
transparent) ASCII or Unicode codepoints, depending on the shell's
.Ic utf8\-hack
.Ic utf8\-mode
flag (current setting).
Note that NUL bytes (integral value of zero) cannot be used.
In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
@ -3468,7 +3468,7 @@ the positional parameters (or to array
if
.Fl A
is used).
.It Fl U \*(Ba Ic utf8\-hack
.It Fl U \*(Ba Ic utf8\-mode
Enable UTF-8 support in the
.Sx Emacs editing mode
and internal string handling functions.
@ -3486,7 +3486,7 @@ and at least one of these returns something that matches
.Dq UTF\-8
or
.Dq utf8 ,
or if the input begins with a Byte Order Mark.
or if the input begins with a UTF-8 Byte Order Mark.
.It Fl u \*(Ba Ic nounset
Referencing of an unset parameter is treated as an error, unless one of the
.Ql \- ,
@ -5559,7 +5559,7 @@ and many other persons, and is currently maintained by
.An Thorsten Glaser Aq tg@mirbsd.de .
.Sh BUGS
This document attempts to describe
.Nm mksh\ R36
.Nm mksh\ R37
and up,
compiled without any options impacting functionality, such as
.Dv MKSH_SMALL ,

11
sh.h
View File

@ -103,9 +103,9 @@
#define __SCCSID(x) __IDSTRING(sccsid,x)
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.264 2008/12/02 13:20:39 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.265 2008/12/04 18:11:08 tg Exp $");
#endif
#define MKSH_VERSION "R36 2008/12/02"
#define MKSH_VERSION "R36 2008/12/04"
#ifndef MKSH_INCLUDES_ONLY
@ -341,7 +341,7 @@ char *ucstrstr(char *, const char *);
#define utf_ptradjx(src, dst) do { \
size_t utf_ptradjx_len; \
\
if (!Flag(FUTFHACK) || \
if (!UTFMODE || \
*(const unsigned char *)(src) < 0xC2 || \
(utf_ptradjx_len = utf_mbtowc(NULL, (src))) == (size_t)-1) \
utf_ptradjx_len = 1; \
@ -489,7 +489,7 @@ enum sh_flag {
FRESTRICTED, /* -r: restricted shell */
FSTDIN, /* -s: (invocation) parse stdin */
FTRACKALL, /* -h: create tracked aliases for all commands */
FUTFHACK, /* -U: utf-8 hack for command line editing */
FUTFMODE, /* -U: enable utf-8 processing */
FVERBOSE, /* -v: echo input */
#ifndef MKSH_NOVI
FVI, /* vi command editing */
@ -503,6 +503,7 @@ enum sh_flag {
};
#define Flag(f) (shell_flags[(int)(f)])
#define UTFMODE Flag(FUTFMODE)
EXTERN char shell_flags[FNFLAGS];
@ -1264,7 +1265,7 @@ void afree(void *, PArea); /* can take NULL */
void x_init(void);
int x_read(char *, size_t);
int x_bind(const char *, const char *, int, int);
/* UTF-8 hack stuff */
/* UTF-8 stuff */
size_t utf_mbtowc(unsigned int *, const char *);
size_t utf_wctomb(char *, unsigned int);
int utf_widthadj(const char *, const char **);

6
var.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.63 2008/11/30 10:33:40 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.64 2008/12/04 18:11:08 tg Exp $");
/*
* Variables
@ -303,7 +303,7 @@ str_val(struct tbl *vp)
*(s = strbuf) = '1';
s[1] = '#';
if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80))
if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
s[2] = n & 0xFF;
else
sz = utf_wctomb(s + 2, n);
@ -455,7 +455,7 @@ getint(struct tbl *vp, long int *nump, bool arith)
if (base == 1) {
unsigned int wc;
if (!Flag(FUTFHACK))
if (!UTFMODE)
wc = *(unsigned char *)s;
else if (utf_mbtowc(&wc, s) == (size_t)-1)
wc = 0xEF00 + *(unsigned char *)s;