Support Dave Korn’s alternative 「'a'」 (or 「'…'」) form for base-one

integers in addition to my 「1#a」 (or 「1#…」), which also allows for
finer end-of-character checking. Note that this is locale-dependent in
ksh93, set ±U dependent in mksh, and mksh’s OPTU-16 encoding is used.
This commit is contained in:
tg 2009-09-06 17:55:55 +00:00
parent 9531e12b36
commit 9dd98da40d
4 changed files with 48 additions and 6 deletions

24
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.303 2009/09/06 17:42:11 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.304 2009/09/06 17:55:53 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh # http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout: expected-stdout:
@(#)MIRBSD KSH R39 2009/08/30 @(#)MIRBSD KSH R39 2009/09/06
description: description:
Check version of shell. Check version of shell.
stdin: stdin:
@ -5520,6 +5520,26 @@ expected-stdout:
00000120 000A EFE0 EF80 EF80 - 000A FFFD EFEF EFBF |.<EFBFBD><EFBFBD><EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>| 00000120 000A EFE0 EF80 EF80 - 000A FFFD EFEF EFBF |.<EFBFBD><EFBFBD><EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>|
00000128 EFBE EFEF EFBF EFBF - 000A |<EFBFBD><EFBFBD><EFBFBD><EFBFBD>.| 00000128 EFBE EFEF EFBF EFBF - 000A |<EFBFBD><EFBFBD><EFBFBD><EFBFBD>.|
--- ---
name: integer-base-one-4
description:
Check if ksh93-style base-one integers work
stdin:
set -U
print 1 $(('a'))
(print 2f $(('aa'))) 2>&1 | sed "s/^[^']*'/2p '/"
print 3 $(('…'))
x="'a'"
print "4 <$x>"
print 5 $(($x))
print 6 $((x))
expected-stdout:
1 97
2p 'aa': multi-character character constant
3 8230
4 <'a'>
5 97
6 97
---
name: ulimit-1 name: ulimit-1
description: description:
Check if we can use a specific syntax idiom for ulimit Check if we can use a specific syntax idiom for ulimit

16
expr.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.33 2009/08/28 21:01:25 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.34 2009/09/06 17:55:54 tg Exp $");
/* The order of these enums is constrained by the order of opinfo[] */ /* The order of these enums is constrained by the order of opinfo[] */
enum token { enum token {
@ -528,6 +528,20 @@ exprtoken(Expr_state *es)
cp += utf_ptradj(cp); cp += utf_ptradj(cp);
strndupx(tvar, es->tokp, cp - es->tokp, ATEMP); strndupx(tvar, es->tokp, cp - es->tokp, ATEMP);
goto process_tvar; goto process_tvar;
} else if (c == '\'') {
++cp;
cp += utf_ptradj(cp);
if (*cp++ != '\'')
evalerr(es, ET_STR,
"multi-character character constant");
/* 'x' -> 1#x (x = one multibyte character) */
c = cp - es->tokp;
tvar = alloc(c + /* NUL */ 1, ATEMP);
tvar[0] = '1';
tvar[1] = '#';
memcpy(tvar + 2, es->tokp + 1, c - 2);
tvar[c] = '\0';
goto process_tvar;
} else if (ksh_isdigit(c)) { } else if (ksh_isdigit(c)) {
while (c != '_' && (ksh_isalnux(c) || c == '#')) while (c != '_' && (ksh_isalnux(c) || c == '#'))
c = *cp++; c = *cp++;

10
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.181 2009/09/06 17:42:13 tg Exp $ .\" $MirOS: src/bin/mksh/mksh.1,v 1.182 2009/09/06 17:55:54 tg Exp $
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $ .\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
.\"- .\"-
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@ -2230,6 +2230,14 @@ extension, numbers to the base of one are treated as either (8-bit
transparent) ASCII or Unicode codepoints, depending on the shell's transparent) ASCII or Unicode codepoints, depending on the shell's
.Ic utf8\-mode .Ic utf8\-mode
flag (current setting). flag (current setting).
The
.At
.Nm ksh93
syntax of
.Dq \*(aqx\*(aq
instead of
.Dq 1#x
is also supported.
Note that NUL bytes (integral value of zero) cannot be used. Note that NUL bytes (integral value of zero) cannot be used.
In Unicode mode, raw octets are mapped into the range EF80..EFFF as in In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
OPTU-8, which is in the PUA and has been assigned by CSUR for this use. OPTU-8, which is in the PUA and has been assigned by CSUR for this use.

4
sh.h
View File

@ -134,9 +134,9 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.338 2009/09/06 17:42:14 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.339 2009/09/06 17:55:55 tg Exp $");
#endif #endif
#define MKSH_VERSION "R39 2009/08/30" #define MKSH_VERSION "R39 2009/09/06"
#ifndef MKSH_INCLUDES_ONLY #ifndef MKSH_INCLUDES_ONLY