Support Dave Korn’s alternative 「'a'」 (or 「'…'」) form for base-one

integers in addition to my 「1#a」 (or 「1#…」), which also allows for
finer end-of-character checking. Note that this is locale-dependent in
ksh93, set ±U dependent in mksh, and mksh’s OPTU-16 encoding is used.
This commit is contained in:
tg 2009-09-06 17:55:55 +00:00
parent 9531e12b36
commit 9dd98da40d
4 changed files with 48 additions and 6 deletions

24
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.303 2009/09/06 17:42:11 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.304 2009/09/06 17:55:53 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R39 2009/08/30
@(#)MIRBSD KSH R39 2009/09/06
description:
Check version of shell.
stdin:
@ -5520,6 +5520,26 @@ expected-stdout:
00000120 000A EFE0 EF80 EF80 - 000A FFFD EFEF EFBF |.<EFBFBD><EFBFBD><EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>|
00000128 EFBE EFEF EFBF EFBF - 000A |<EFBFBD><EFBFBD><EFBFBD><EFBFBD>.|
---
name: integer-base-one-4
description:
Check if ksh93-style base-one integers work
stdin:
set -U
print 1 $(('a'))
(print 2f $(('aa'))) 2>&1 | sed "s/^[^']*'/2p '/"
print 3 $(('…'))
x="'a'"
print "4 <$x>"
print 5 $(($x))
print 6 $((x))
expected-stdout:
1 97
2p 'aa': multi-character character constant
3 8230
4 <'a'>
5 97
6 97
---
name: ulimit-1
description:
Check if we can use a specific syntax idiom for ulimit

16
expr.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.33 2009/08/28 21:01:25 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.34 2009/09/06 17:55:54 tg Exp $");
/* The order of these enums is constrained by the order of opinfo[] */
enum token {
@ -528,6 +528,20 @@ exprtoken(Expr_state *es)
cp += utf_ptradj(cp);
strndupx(tvar, es->tokp, cp - es->tokp, ATEMP);
goto process_tvar;
} else if (c == '\'') {
++cp;
cp += utf_ptradj(cp);
if (*cp++ != '\'')
evalerr(es, ET_STR,
"multi-character character constant");
/* 'x' -> 1#x (x = one multibyte character) */
c = cp - es->tokp;
tvar = alloc(c + /* NUL */ 1, ATEMP);
tvar[0] = '1';
tvar[1] = '#';
memcpy(tvar + 2, es->tokp + 1, c - 2);
tvar[c] = '\0';
goto process_tvar;
} else if (ksh_isdigit(c)) {
while (c != '_' && (ksh_isalnux(c) || c == '#'))
c = *cp++;

10
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.181 2009/09/06 17:42:13 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.182 2009/09/06 17:55:54 tg Exp $
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
.\"-
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@ -2230,6 +2230,14 @@ extension, numbers to the base of one are treated as either (8-bit
transparent) ASCII or Unicode codepoints, depending on the shell's
.Ic utf8\-mode
flag (current setting).
The
.At
.Nm ksh93
syntax of
.Dq \*(aqx\*(aq
instead of
.Dq 1#x
is also supported.
Note that NUL bytes (integral value of zero) cannot be used.
In Unicode mode, raw octets are mapped into the range EF80..EFFF as in
OPTU-8, which is in the PUA and has been assigned by CSUR for this use.

4
sh.h
View File

@ -134,9 +134,9 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.338 2009/09/06 17:42:14 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.339 2009/09/06 17:55:55 tg Exp $");
#endif
#define MKSH_VERSION "R39 2009/08/30"
#define MKSH_VERSION "R39 2009/09/06"
#ifndef MKSH_INCLUDES_ONLY