same as in commitid 1005B6CF36E3932B560, plus assorted cleanup
This commit is contained in:
parent
d5ce724471
commit
6cea111ef1
18
check.t
18
check.t
@ -1,4 +1,4 @@
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.807 2018/07/15 17:22:15 tg Exp $
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.808 2018/08/10 02:53:31 tg Exp $
|
||||
# -*- mode: sh -*-
|
||||
#-
|
||||
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||
@ -2487,7 +2487,7 @@ expected-stdout:
|
||||
name: glob-range-3
|
||||
description:
|
||||
Check that globbing matches the right things...
|
||||
# breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
|
||||
# breaks on Mac OSX (HFS+ non-standard UTF-8 canonical decomposition)
|
||||
# breaks on Cygwin 1.7 (files are now UTF-16 or something)
|
||||
# breaks on QNX 6.4.1 (says RT)
|
||||
category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2,!os:os390
|
||||
@ -8512,7 +8512,7 @@ expected-stdout:
|
||||
---
|
||||
name: typeset-padding-3
|
||||
description:
|
||||
Check for a regression in which Unicode wasn’t left-padded right
|
||||
Check for a regression in which UTF-8 wasn’t left-padded right
|
||||
stdin:
|
||||
set -U
|
||||
nl=$'\n'
|
||||
@ -8535,7 +8535,7 @@ description:
|
||||
Check that the UTF-8 Byte Order Mark is ignored as the first
|
||||
multibyte character of the shell input (with -c, from standard
|
||||
input, as file, or as eval argument), but nowhere else
|
||||
# breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
|
||||
# breaks on Mac OSX (HFS+ non-standard UTF-8 canonical decomposition)
|
||||
category: !os:darwin,!shell:ebcdic-yes
|
||||
stdin:
|
||||
mkdir foo
|
||||
@ -10414,7 +10414,7 @@ expected-stdout:
|
||||
---
|
||||
name: integer-base-one-3Ws
|
||||
description:
|
||||
some sample code for hexdumping Unicode
|
||||
some sample code for hexdumping UCS-2
|
||||
not NUL safe; input lines must be NL terminated
|
||||
stdin:
|
||||
set -U
|
||||
@ -10582,7 +10582,7 @@ expected-stdout:
|
||||
---
|
||||
name: integer-base-one-3Wr
|
||||
description:
|
||||
some sample code for hexdumping Unicode; NUL and binary safe
|
||||
some sample code for hexdumping UCS-2; NUL and binary safe
|
||||
stdin:
|
||||
set -U
|
||||
{
|
||||
@ -10702,7 +10702,7 @@ expected-stdout:
|
||||
---
|
||||
name: integer-base-one-5A
|
||||
description:
|
||||
Check to see that we’re NUL and Unicode safe
|
||||
Check to see that we’re NUL and UCS safe
|
||||
category: !shell:ebcdic-yes
|
||||
stdin:
|
||||
set +U
|
||||
@ -10716,7 +10716,7 @@ expected-stdout:
|
||||
---
|
||||
name: integer-base-one-5E
|
||||
description:
|
||||
Check to see that we’re NUL and Unicode safe
|
||||
Check to see that we’re NUL and UCS safe
|
||||
category: !shell:ebcdic-no
|
||||
stdin:
|
||||
set +U
|
||||
@ -10730,7 +10730,7 @@ expected-stdout:
|
||||
---
|
||||
name: integer-base-one-5W
|
||||
description:
|
||||
Check to see that we’re NUL and Unicode safe
|
||||
Check to see that we’re NUL and UCS safe
|
||||
stdin:
|
||||
set -U
|
||||
print 'a\0b€c' >x
|
||||
|
6
expr.c
6
expr.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.104 2018/06/26 21:22:21 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.105 2018/08/10 02:53:33 tg Exp $");
|
||||
|
||||
#define EXPRTOK_DEFNS
|
||||
#include "exprtok.h"
|
||||
@ -885,7 +885,7 @@ static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
|
||||
unsigned int val) MKSH_A_PURE;
|
||||
|
||||
/*
|
||||
* Generated from the Unicode Character Database, Version 11.0.0, by
|
||||
* Generated from the UCD 11.0.0 by
|
||||
* MirOS: contrib/code/Snippets/eawparse,v 1.12 2017/09/06 16:05:45 tg Exp $
|
||||
*/
|
||||
|
||||
@ -1177,7 +1177,7 @@ mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems, unsigned int val)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Unix column width of a wide character (Unicode code point, really) */
|
||||
/* Unix column width of a wide character (UCS code point, really) */
|
||||
int
|
||||
utf_wcwidth(unsigned int wc)
|
||||
{
|
||||
|
4
funcs.c
4
funcs.c
@ -38,7 +38,7 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.353 2018/01/14 01:26:49 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.354 2018/08/10 02:53:34 tg Exp $");
|
||||
|
||||
#if HAVE_KILLPG
|
||||
/*
|
||||
@ -493,7 +493,7 @@ c_print(const char **wp)
|
||||
Xput(xs, xp, '\\');
|
||||
}
|
||||
} else if ((unsigned int)c > 0xFF) {
|
||||
/* generic function returned Unicode */
|
||||
/* generic function returned UCS */
|
||||
po.ts[utf_wctomb(po.ts, c - 0x100)] = 0;
|
||||
c = 0;
|
||||
do {
|
||||
|
10
misc.c
10
misc.c
@ -32,7 +32,7 @@
|
||||
#include <grp.h>
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.292 2018/03/17 22:46:09 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.293 2018/08/10 02:53:35 tg Exp $");
|
||||
|
||||
#define KSH_CHVT_FLAG
|
||||
#ifdef MKSH_SMALL
|
||||
@ -2456,7 +2456,7 @@ getrusage(int what, struct rusage *ru)
|
||||
* and fp (put back a char) for backslash escapes,
|
||||
* assuming the first call to *fg gets the char di-
|
||||
* rectly after the backslash; return the character
|
||||
* (0..0xFF), Unicode (wc + 0x100), or -1 if no known
|
||||
* (0..0xFF), UCS (wc + 0x100), or -1 if no known
|
||||
* escape sequence was found
|
||||
*/
|
||||
int
|
||||
@ -2538,9 +2538,9 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
|
||||
/**
|
||||
* x: look for a hexadecimal number with up to
|
||||
* two (C style: arbitrary) digits; convert
|
||||
* to raw octet (C style: Unicode if >0xFF)
|
||||
* to raw octet (C style: UCS if >0xFF)
|
||||
* u/U: look for a hexadecimal number with up to
|
||||
* four (U: eight) digits; convert to Unicode
|
||||
* four (U: eight) digits; convert to UCS
|
||||
*/
|
||||
wc = 0;
|
||||
n = 0;
|
||||
@ -2562,7 +2562,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
|
||||
if (!n)
|
||||
goto unknown_escape;
|
||||
if ((cstyle && wc > 0xFF) || fc != 'x')
|
||||
/* Unicode marker */
|
||||
/* UCS marker */
|
||||
wc += 0x100;
|
||||
break;
|
||||
case '\'':
|
||||
|
17
mksh.1
17
mksh.1
@ -1,4 +1,4 @@
|
||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.458 2018/07/15 17:21:22 tg Exp $
|
||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.459 2018/08/10 02:53:36 tg Exp $
|
||||
.\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $
|
||||
.\"-
|
||||
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
|
||||
@ -77,7 +77,7 @@
|
||||
.\" with -mandoc, it might implement .Mx itself, but we want to
|
||||
.\" use our own definition. And .Dd must come *first*, always.
|
||||
.\"
|
||||
.Dd $Mdocdate: July 15 2018 $
|
||||
.Dd $Mdocdate: August 10 2018 $
|
||||
.\"
|
||||
.\" Check which macro package we use, and do other -mdoc setup.
|
||||
.\"
|
||||
@ -1047,7 +1047,7 @@ and
|
||||
.Dq Li \eu#### ,
|
||||
.Dq #
|
||||
means a hexadecimal digit, of which there may be none up to four or eight;
|
||||
these escapes translate a Unicode codepoint to UTF-8.
|
||||
these escapes translate a Universal Coded Character Set codepoint to UTF-8.
|
||||
Furthermore,
|
||||
.Dq Li \eE
|
||||
and
|
||||
@ -1083,7 +1083,7 @@ and yield raw octets; hexadecimal sequences
|
||||
greedily eat up as many hexadecimal digits
|
||||
.Dq #
|
||||
as they can and terminate with the first non-hexadecimal digit;
|
||||
these translate a Unicode codepoint to UTF-8.
|
||||
these translate a Universal Coded Character Set codepoint to UTF-8.
|
||||
The sequence
|
||||
.Dq Li \ec# ,
|
||||
where
|
||||
@ -2652,7 +2652,8 @@ as required by the standard), as that's unsafe to do.
|
||||
As a special
|
||||
.Nm mksh
|
||||
extension, numbers to the base of one are treated as either (8-bit
|
||||
transparent) ASCII or Unicode codepoints, depending on the shell's
|
||||
transparent) ASCII or Universal Coded Character Set codepoints,
|
||||
depending on the shell's
|
||||
.Ic utf8\-mode
|
||||
flag (current setting).
|
||||
The
|
||||
@ -2665,7 +2666,7 @@ instead of
|
||||
is also supported.
|
||||
Note that NUL bytes (integral value of zero) cannot be used.
|
||||
An unset or empty parameter evaluates to 0 in integer context.
|
||||
In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
|
||||
In UTF-8 mode, raw octets are mapped into the range EF80..EFFF as in
|
||||
OPTU-8, which is in the PUA and has been assigned by CSUR for this use.
|
||||
If more than one octet in ASCII mode, or a sequence of more than one
|
||||
octet not forming a valid and minimal CESU-8 sequence is passed, the
|
||||
@ -6658,7 +6659,7 @@ locale.
|
||||
.Ic utf8\-mode
|
||||
.Em must
|
||||
be disabled in POSIX mode, and it
|
||||
only supports the Unicode BMP (Basic Multilingual Plane) and maps
|
||||
only supports the BMP (Basic Multilingual Plane) of UCS and maps
|
||||
raw octets into the U+EF80..U+EFFF wide character range; compare
|
||||
.Sx Arithmetic expressions .
|
||||
The following
|
||||
@ -6679,7 +6680,7 @@ case ${KSH_VERSION:\-} in
|
||||
esac ;;
|
||||
esac
|
||||
.Ed
|
||||
In near future, (Unicode) locale tracking will be implemented though.
|
||||
In near future, (UTF-8) locale tracking will be implemented though.
|
||||
.Pp
|
||||
See also the FAQ below.
|
||||
.Sh BUGS
|
||||
|
4
sh.h
4
sh.h
@ -182,7 +182,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef EXTERN
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.864 2018/07/15 17:21:23 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.865 2018/08/10 02:53:37 tg Exp $");
|
||||
#endif
|
||||
#define MKSH_VERSION "R56 2018/07/15"
|
||||
|
||||
@ -783,7 +783,7 @@ enum sh_flag {
|
||||
};
|
||||
|
||||
#define Flag(f) (shell_flags[(int)(f)])
|
||||
#define UTFMODE Flag(FUNICODE)
|
||||
#define UTFMODE Flag(FUNNYCODE)
|
||||
|
||||
/*
|
||||
* parsing & execution environment
|
||||
|
@ -19,7 +19,7 @@
|
||||
*/
|
||||
|
||||
@SHFLAGS_DEFNS
|
||||
__RCSID("$MirOS: src/bin/mksh/sh_flags.opt,v 1.5 2017/02/18 02:33:15 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh_flags.opt,v 1.6 2018/08/10 02:53:39 tg Exp $");
|
||||
@SHFLAGS_ENUMS
|
||||
#define FN(sname,cname,flags,ochar) cname,
|
||||
#define F0(sname,cname,flags,ochar) cname = 0,
|
||||
@ -153,7 +153,7 @@ FN("trackall", FTRACKALL, OF_ANY
|
||||
|
||||
/* -U enable UTF-8 processing (non-standard) */
|
||||
>U|
|
||||
FN("utf8-mode", FUNICODE, OF_ANY
|
||||
FN("utf8-mode", FUNNYCODE, OF_ANY
|
||||
|
||||
/* -v echo input */
|
||||
>v|
|
||||
|
4
shf.c
4
shf.c
@ -27,7 +27,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.97 2018/01/14 01:28:16 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.98 2018/08/10 02:53:39 tg Exp $");
|
||||
|
||||
/* flags to shf_emptybuf() */
|
||||
#define EB_READSW 0x01 /* about to switch to reading */
|
||||
@ -1304,7 +1304,7 @@ ebcdic_init(void)
|
||||
* and the C1 control characters other than NEL are
|
||||
* hopeless, but we map EBCDIC NEL to ASCII LF so we
|
||||
* cannot even use C1 NEL.
|
||||
* If ever we map to Unicode, bump the table width to
|
||||
* If ever we map to UCS, bump the table width to
|
||||
* an unsigned int, and or the raw unconverted EBCDIC
|
||||
* values with 0x01000000 instead.
|
||||
*/
|
||||
|
4
tree.c
4
tree.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.95 2018/01/14 00:03:05 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.96 2018/08/10 02:53:39 tg Exp $");
|
||||
|
||||
#define INDENT 8
|
||||
|
||||
@ -808,7 +808,7 @@ vistree(char *dst, size_t sz, struct op *t)
|
||||
c = ksh_unctrl(c);
|
||||
} else if (UTFMODE && rtt2asc(c) > 0x7F) {
|
||||
/* better not try to display broken multibyte chars */
|
||||
/* also go easy on the Unicode: no U+FFFD here */
|
||||
/* also go easy on the UCS: no U+FFFD here */
|
||||
c = ORD('?');
|
||||
}
|
||||
*dst++ = c;
|
||||
|
Loading…
Reference in New Issue
Block a user