same as in commitid 1005B6CF36E3932B560, plus assorted cleanup
This commit is contained in:
parent
d5ce724471
commit
6cea111ef1
18
check.t
18
check.t
@ -1,4 +1,4 @@
|
|||||||
# $MirOS: src/bin/mksh/check.t,v 1.807 2018/07/15 17:22:15 tg Exp $
|
# $MirOS: src/bin/mksh/check.t,v 1.808 2018/08/10 02:53:31 tg Exp $
|
||||||
# -*- mode: sh -*-
|
# -*- mode: sh -*-
|
||||||
#-
|
#-
|
||||||
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||||
@ -2487,7 +2487,7 @@ expected-stdout:
|
|||||||
name: glob-range-3
|
name: glob-range-3
|
||||||
description:
|
description:
|
||||||
Check that globbing matches the right things...
|
Check that globbing matches the right things...
|
||||||
# breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
|
# breaks on Mac OSX (HFS+ non-standard UTF-8 canonical decomposition)
|
||||||
# breaks on Cygwin 1.7 (files are now UTF-16 or something)
|
# breaks on Cygwin 1.7 (files are now UTF-16 or something)
|
||||||
# breaks on QNX 6.4.1 (says RT)
|
# breaks on QNX 6.4.1 (says RT)
|
||||||
category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2,!os:os390
|
category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2,!os:os390
|
||||||
@ -8512,7 +8512,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: typeset-padding-3
|
name: typeset-padding-3
|
||||||
description:
|
description:
|
||||||
Check for a regression in which Unicode wasn’t left-padded right
|
Check for a regression in which UTF-8 wasn’t left-padded right
|
||||||
stdin:
|
stdin:
|
||||||
set -U
|
set -U
|
||||||
nl=$'\n'
|
nl=$'\n'
|
||||||
@ -8535,7 +8535,7 @@ description:
|
|||||||
Check that the UTF-8 Byte Order Mark is ignored as the first
|
Check that the UTF-8 Byte Order Mark is ignored as the first
|
||||||
multibyte character of the shell input (with -c, from standard
|
multibyte character of the shell input (with -c, from standard
|
||||||
input, as file, or as eval argument), but nowhere else
|
input, as file, or as eval argument), but nowhere else
|
||||||
# breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
|
# breaks on Mac OSX (HFS+ non-standard UTF-8 canonical decomposition)
|
||||||
category: !os:darwin,!shell:ebcdic-yes
|
category: !os:darwin,!shell:ebcdic-yes
|
||||||
stdin:
|
stdin:
|
||||||
mkdir foo
|
mkdir foo
|
||||||
@ -10414,7 +10414,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: integer-base-one-3Ws
|
name: integer-base-one-3Ws
|
||||||
description:
|
description:
|
||||||
some sample code for hexdumping Unicode
|
some sample code for hexdumping UCS-2
|
||||||
not NUL safe; input lines must be NL terminated
|
not NUL safe; input lines must be NL terminated
|
||||||
stdin:
|
stdin:
|
||||||
set -U
|
set -U
|
||||||
@ -10582,7 +10582,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: integer-base-one-3Wr
|
name: integer-base-one-3Wr
|
||||||
description:
|
description:
|
||||||
some sample code for hexdumping Unicode; NUL and binary safe
|
some sample code for hexdumping UCS-2; NUL and binary safe
|
||||||
stdin:
|
stdin:
|
||||||
set -U
|
set -U
|
||||||
{
|
{
|
||||||
@ -10702,7 +10702,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: integer-base-one-5A
|
name: integer-base-one-5A
|
||||||
description:
|
description:
|
||||||
Check to see that we’re NUL and Unicode safe
|
Check to see that we’re NUL and UCS safe
|
||||||
category: !shell:ebcdic-yes
|
category: !shell:ebcdic-yes
|
||||||
stdin:
|
stdin:
|
||||||
set +U
|
set +U
|
||||||
@ -10716,7 +10716,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: integer-base-one-5E
|
name: integer-base-one-5E
|
||||||
description:
|
description:
|
||||||
Check to see that we’re NUL and Unicode safe
|
Check to see that we’re NUL and UCS safe
|
||||||
category: !shell:ebcdic-no
|
category: !shell:ebcdic-no
|
||||||
stdin:
|
stdin:
|
||||||
set +U
|
set +U
|
||||||
@ -10730,7 +10730,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: integer-base-one-5W
|
name: integer-base-one-5W
|
||||||
description:
|
description:
|
||||||
Check to see that we’re NUL and Unicode safe
|
Check to see that we’re NUL and UCS safe
|
||||||
stdin:
|
stdin:
|
||||||
set -U
|
set -U
|
||||||
print 'a\0b€c' >x
|
print 'a\0b€c' >x
|
||||||
|
6
expr.c
6
expr.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.104 2018/06/26 21:22:21 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.105 2018/08/10 02:53:33 tg Exp $");
|
||||||
|
|
||||||
#define EXPRTOK_DEFNS
|
#define EXPRTOK_DEFNS
|
||||||
#include "exprtok.h"
|
#include "exprtok.h"
|
||||||
@ -885,7 +885,7 @@ static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
|
|||||||
unsigned int val) MKSH_A_PURE;
|
unsigned int val) MKSH_A_PURE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generated from the Unicode Character Database, Version 11.0.0, by
|
* Generated from the UCD 11.0.0 by
|
||||||
* MirOS: contrib/code/Snippets/eawparse,v 1.12 2017/09/06 16:05:45 tg Exp $
|
* MirOS: contrib/code/Snippets/eawparse,v 1.12 2017/09/06 16:05:45 tg Exp $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -1177,7 +1177,7 @@ mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems, unsigned int val)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Unix column width of a wide character (Unicode code point, really) */
|
/* Unix column width of a wide character (UCS code point, really) */
|
||||||
int
|
int
|
||||||
utf_wcwidth(unsigned int wc)
|
utf_wcwidth(unsigned int wc)
|
||||||
{
|
{
|
||||||
|
4
funcs.c
4
funcs.c
@ -38,7 +38,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.353 2018/01/14 01:26:49 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.354 2018/08/10 02:53:34 tg Exp $");
|
||||||
|
|
||||||
#if HAVE_KILLPG
|
#if HAVE_KILLPG
|
||||||
/*
|
/*
|
||||||
@ -493,7 +493,7 @@ c_print(const char **wp)
|
|||||||
Xput(xs, xp, '\\');
|
Xput(xs, xp, '\\');
|
||||||
}
|
}
|
||||||
} else if ((unsigned int)c > 0xFF) {
|
} else if ((unsigned int)c > 0xFF) {
|
||||||
/* generic function returned Unicode */
|
/* generic function returned UCS */
|
||||||
po.ts[utf_wctomb(po.ts, c - 0x100)] = 0;
|
po.ts[utf_wctomb(po.ts, c - 0x100)] = 0;
|
||||||
c = 0;
|
c = 0;
|
||||||
do {
|
do {
|
||||||
|
10
misc.c
10
misc.c
@ -32,7 +32,7 @@
|
|||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.292 2018/03/17 22:46:09 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.293 2018/08/10 02:53:35 tg Exp $");
|
||||||
|
|
||||||
#define KSH_CHVT_FLAG
|
#define KSH_CHVT_FLAG
|
||||||
#ifdef MKSH_SMALL
|
#ifdef MKSH_SMALL
|
||||||
@ -2456,7 +2456,7 @@ getrusage(int what, struct rusage *ru)
|
|||||||
* and fp (put back a char) for backslash escapes,
|
* and fp (put back a char) for backslash escapes,
|
||||||
* assuming the first call to *fg gets the char di-
|
* assuming the first call to *fg gets the char di-
|
||||||
* rectly after the backslash; return the character
|
* rectly after the backslash; return the character
|
||||||
* (0..0xFF), Unicode (wc + 0x100), or -1 if no known
|
* (0..0xFF), UCS (wc + 0x100), or -1 if no known
|
||||||
* escape sequence was found
|
* escape sequence was found
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
@ -2538,9 +2538,9 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
|
|||||||
/**
|
/**
|
||||||
* x: look for a hexadecimal number with up to
|
* x: look for a hexadecimal number with up to
|
||||||
* two (C style: arbitrary) digits; convert
|
* two (C style: arbitrary) digits; convert
|
||||||
* to raw octet (C style: Unicode if >0xFF)
|
* to raw octet (C style: UCS if >0xFF)
|
||||||
* u/U: look for a hexadecimal number with up to
|
* u/U: look for a hexadecimal number with up to
|
||||||
* four (U: eight) digits; convert to Unicode
|
* four (U: eight) digits; convert to UCS
|
||||||
*/
|
*/
|
||||||
wc = 0;
|
wc = 0;
|
||||||
n = 0;
|
n = 0;
|
||||||
@ -2562,7 +2562,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
|
|||||||
if (!n)
|
if (!n)
|
||||||
goto unknown_escape;
|
goto unknown_escape;
|
||||||
if ((cstyle && wc > 0xFF) || fc != 'x')
|
if ((cstyle && wc > 0xFF) || fc != 'x')
|
||||||
/* Unicode marker */
|
/* UCS marker */
|
||||||
wc += 0x100;
|
wc += 0x100;
|
||||||
break;
|
break;
|
||||||
case '\'':
|
case '\'':
|
||||||
|
17
mksh.1
17
mksh.1
@ -1,4 +1,4 @@
|
|||||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.458 2018/07/15 17:21:22 tg Exp $
|
.\" $MirOS: src/bin/mksh/mksh.1,v 1.459 2018/08/10 02:53:36 tg Exp $
|
||||||
.\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $
|
.\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $
|
||||||
.\"-
|
.\"-
|
||||||
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
|
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
|
||||||
@ -77,7 +77,7 @@
|
|||||||
.\" with -mandoc, it might implement .Mx itself, but we want to
|
.\" with -mandoc, it might implement .Mx itself, but we want to
|
||||||
.\" use our own definition. And .Dd must come *first*, always.
|
.\" use our own definition. And .Dd must come *first*, always.
|
||||||
.\"
|
.\"
|
||||||
.Dd $Mdocdate: July 15 2018 $
|
.Dd $Mdocdate: August 10 2018 $
|
||||||
.\"
|
.\"
|
||||||
.\" Check which macro package we use, and do other -mdoc setup.
|
.\" Check which macro package we use, and do other -mdoc setup.
|
||||||
.\"
|
.\"
|
||||||
@ -1047,7 +1047,7 @@ and
|
|||||||
.Dq Li \eu#### ,
|
.Dq Li \eu#### ,
|
||||||
.Dq #
|
.Dq #
|
||||||
means a hexadecimal digit, of which there may be none up to four or eight;
|
means a hexadecimal digit, of which there may be none up to four or eight;
|
||||||
these escapes translate a Unicode codepoint to UTF-8.
|
these escapes translate a Universal Coded Character Set codepoint to UTF-8.
|
||||||
Furthermore,
|
Furthermore,
|
||||||
.Dq Li \eE
|
.Dq Li \eE
|
||||||
and
|
and
|
||||||
@ -1083,7 +1083,7 @@ and yield raw octets; hexadecimal sequences
|
|||||||
greedily eat up as many hexadecimal digits
|
greedily eat up as many hexadecimal digits
|
||||||
.Dq #
|
.Dq #
|
||||||
as they can and terminate with the first non-hexadecimal digit;
|
as they can and terminate with the first non-hexadecimal digit;
|
||||||
these translate a Unicode codepoint to UTF-8.
|
these translate a Universal Coded Character Set codepoint to UTF-8.
|
||||||
The sequence
|
The sequence
|
||||||
.Dq Li \ec# ,
|
.Dq Li \ec# ,
|
||||||
where
|
where
|
||||||
@ -2652,7 +2652,8 @@ as required by the standard), as that's unsafe to do.
|
|||||||
As a special
|
As a special
|
||||||
.Nm mksh
|
.Nm mksh
|
||||||
extension, numbers to the base of one are treated as either (8-bit
|
extension, numbers to the base of one are treated as either (8-bit
|
||||||
transparent) ASCII or Unicode codepoints, depending on the shell's
|
transparent) ASCII or Universal Coded Character Set codepoints,
|
||||||
|
depending on the shell's
|
||||||
.Ic utf8\-mode
|
.Ic utf8\-mode
|
||||||
flag (current setting).
|
flag (current setting).
|
||||||
The
|
The
|
||||||
@ -2665,7 +2666,7 @@ instead of
|
|||||||
is also supported.
|
is also supported.
|
||||||
Note that NUL bytes (integral value of zero) cannot be used.
|
Note that NUL bytes (integral value of zero) cannot be used.
|
||||||
An unset or empty parameter evaluates to 0 in integer context.
|
An unset or empty parameter evaluates to 0 in integer context.
|
||||||
In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
|
In UTF-8 mode, raw octets are mapped into the range EF80..EFFF as in
|
||||||
OPTU-8, which is in the PUA and has been assigned by CSUR for this use.
|
OPTU-8, which is in the PUA and has been assigned by CSUR for this use.
|
||||||
If more than one octet in ASCII mode, or a sequence of more than one
|
If more than one octet in ASCII mode, or a sequence of more than one
|
||||||
octet not forming a valid and minimal CESU-8 sequence is passed, the
|
octet not forming a valid and minimal CESU-8 sequence is passed, the
|
||||||
@ -6658,7 +6659,7 @@ locale.
|
|||||||
.Ic utf8\-mode
|
.Ic utf8\-mode
|
||||||
.Em must
|
.Em must
|
||||||
be disabled in POSIX mode, and it
|
be disabled in POSIX mode, and it
|
||||||
only supports the Unicode BMP (Basic Multilingual Plane) and maps
|
only supports the BMP (Basic Multilingual Plane) of UCS and maps
|
||||||
raw octets into the U+EF80..U+EFFF wide character range; compare
|
raw octets into the U+EF80..U+EFFF wide character range; compare
|
||||||
.Sx Arithmetic expressions .
|
.Sx Arithmetic expressions .
|
||||||
The following
|
The following
|
||||||
@ -6679,7 +6680,7 @@ case ${KSH_VERSION:\-} in
|
|||||||
esac ;;
|
esac ;;
|
||||||
esac
|
esac
|
||||||
.Ed
|
.Ed
|
||||||
In near future, (Unicode) locale tracking will be implemented though.
|
In near future, (UTF-8) locale tracking will be implemented though.
|
||||||
.Pp
|
.Pp
|
||||||
See also the FAQ below.
|
See also the FAQ below.
|
||||||
.Sh BUGS
|
.Sh BUGS
|
||||||
|
4
sh.h
4
sh.h
@ -182,7 +182,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EXTERN
|
#ifdef EXTERN
|
||||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.864 2018/07/15 17:21:23 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.865 2018/08/10 02:53:37 tg Exp $");
|
||||||
#endif
|
#endif
|
||||||
#define MKSH_VERSION "R56 2018/07/15"
|
#define MKSH_VERSION "R56 2018/07/15"
|
||||||
|
|
||||||
@ -783,7 +783,7 @@ enum sh_flag {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define Flag(f) (shell_flags[(int)(f)])
|
#define Flag(f) (shell_flags[(int)(f)])
|
||||||
#define UTFMODE Flag(FUNICODE)
|
#define UTFMODE Flag(FUNNYCODE)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* parsing & execution environment
|
* parsing & execution environment
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
@SHFLAGS_DEFNS
|
@SHFLAGS_DEFNS
|
||||||
__RCSID("$MirOS: src/bin/mksh/sh_flags.opt,v 1.5 2017/02/18 02:33:15 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/sh_flags.opt,v 1.6 2018/08/10 02:53:39 tg Exp $");
|
||||||
@SHFLAGS_ENUMS
|
@SHFLAGS_ENUMS
|
||||||
#define FN(sname,cname,flags,ochar) cname,
|
#define FN(sname,cname,flags,ochar) cname,
|
||||||
#define F0(sname,cname,flags,ochar) cname = 0,
|
#define F0(sname,cname,flags,ochar) cname = 0,
|
||||||
@ -153,7 +153,7 @@ FN("trackall", FTRACKALL, OF_ANY
|
|||||||
|
|
||||||
/* -U enable UTF-8 processing (non-standard) */
|
/* -U enable UTF-8 processing (non-standard) */
|
||||||
>U|
|
>U|
|
||||||
FN("utf8-mode", FUNICODE, OF_ANY
|
FN("utf8-mode", FUNNYCODE, OF_ANY
|
||||||
|
|
||||||
/* -v echo input */
|
/* -v echo input */
|
||||||
>v|
|
>v|
|
||||||
|
4
shf.c
4
shf.c
@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.97 2018/01/14 01:28:16 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.98 2018/08/10 02:53:39 tg Exp $");
|
||||||
|
|
||||||
/* flags to shf_emptybuf() */
|
/* flags to shf_emptybuf() */
|
||||||
#define EB_READSW 0x01 /* about to switch to reading */
|
#define EB_READSW 0x01 /* about to switch to reading */
|
||||||
@ -1304,7 +1304,7 @@ ebcdic_init(void)
|
|||||||
* and the C1 control characters other than NEL are
|
* and the C1 control characters other than NEL are
|
||||||
* hopeless, but we map EBCDIC NEL to ASCII LF so we
|
* hopeless, but we map EBCDIC NEL to ASCII LF so we
|
||||||
* cannot even use C1 NEL.
|
* cannot even use C1 NEL.
|
||||||
* If ever we map to Unicode, bump the table width to
|
* If ever we map to UCS, bump the table width to
|
||||||
* an unsigned int, and or the raw unconverted EBCDIC
|
* an unsigned int, and or the raw unconverted EBCDIC
|
||||||
* values with 0x01000000 instead.
|
* values with 0x01000000 instead.
|
||||||
*/
|
*/
|
||||||
|
4
tree.c
4
tree.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.95 2018/01/14 00:03:05 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.96 2018/08/10 02:53:39 tg Exp $");
|
||||||
|
|
||||||
#define INDENT 8
|
#define INDENT 8
|
||||||
|
|
||||||
@ -808,7 +808,7 @@ vistree(char *dst, size_t sz, struct op *t)
|
|||||||
c = ksh_unctrl(c);
|
c = ksh_unctrl(c);
|
||||||
} else if (UTFMODE && rtt2asc(c) > 0x7F) {
|
} else if (UTFMODE && rtt2asc(c) > 0x7F) {
|
||||||
/* better not try to display broken multibyte chars */
|
/* better not try to display broken multibyte chars */
|
||||||
/* also go easy on the Unicode: no U+FFFD here */
|
/* also go easy on the UCS: no U+FFFD here */
|
||||||
c = ORD('?');
|
c = ORD('?');
|
||||||
}
|
}
|
||||||
*dst++ = c;
|
*dst++ = c;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user