small character classes overhaul:

• make fast character classes even faster by removing the C_SUBOP2 hack
  in favour of a separate seldom-used ksh_issubop2 macro (which also
  makes ctype() side-effect-safe) which is a slower class (no change there)
• optimise cases of ksh_isalphx followed by a ksh_isalnux loop
  (used parsing variable names)
• remove a misleading comment in initctypes() about \0 from pdksh
• rename C_ALPHA to C_ALPHX to make it more clear the underscore is included
• sprinkle a few ord() in there
• add new ksh_isalpha() which tests for [A-Za-z] (slow character class)
• there is no '_:\' drive on OS/2 (which inspired the whole changeset)
This commit is contained in:
tg 2017-03-26 00:10:26 +00:00
parent 034d0c0269
commit 7529e350cf
7 changed files with 33 additions and 33 deletions

View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.765 2017/03/22 00:20:39 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.766 2017/03/26 00:10:21 tg Exp $
# -*- mode: sh -*-
#-
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@ -30,7 +30,7 @@
# (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date
expected-stdout:
@(#)MIRBSD KSH R54 2017/03/21
@(#)MIRBSD KSH R54 2017/03/25
description:
Check version of shell.
stdin:
@ -39,7 +39,7 @@ name: KSH_VERSION
category: shell:legacy-no
---
expected-stdout:
@(#)LEGACY KSH R54 2017/03/21
@(#)LEGACY KSH R54 2017/03/25
description:
Check version of legacy shell.
stdin:

8
eval.c
View File

@ -23,7 +23,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.198 2017/03/11 22:49:55 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.199 2017/03/26 00:10:23 tg Exp $");
/*
* string expansion
@ -1196,7 +1196,7 @@ varsub(Expand *xp, const char *sp, const char *word,
} else if (ctype(c, C_SUBOP1)) {
slen += 2;
stype |= c;
} else if (ctype(c, C_SUBOP2)) {
} else if (ksh_issubop2(c)) {
/* Note: ksh88 allows :%, :%%, etc */
slen += 2;
stype = c;
@ -1304,7 +1304,7 @@ varsub(Expand *xp, const char *sp, const char *word,
c = stype & 0x7F;
/* test the compiler's code generator */
if (((stype < 0x100) && (ctype(c, C_SUBOP2) ||
if (((stype < 0x100) && (ksh_issubop2(c) ||
(((stype & 0x80) ? *xp->str == '\0' : xp->str == null) &&
(state != XARG || (ifs0 || xp->split ?
(xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
@ -1314,7 +1314,7 @@ varsub(Expand *xp, const char *sp, const char *word,
/* expand word instead of variable value */
state = XBASE;
if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
(ctype(c, C_SUBOP2) || (state != XBASE && c != '+')))
(ksh_issubop2(c) || (state != XBASE && c != '+')))
errorf(Tf_parm, sp);
*stypep = stype;
*slenp = slen;

9
expr.c
View File

@ -2,7 +2,7 @@
/*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
* 2011, 2012, 2013, 2014, 2016
* 2011, 2012, 2013, 2014, 2016, 2017
* mirabilos <m@mirbsd.org>
*
* Provided that these terms and disclaimer and all copyright notices
@ -23,7 +23,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.90 2016/11/07 16:58:48 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.91 2017/03/26 00:10:23 tg Exp $");
#define EXPRTOK_DEFNS
#include "exprtok.h"
@ -572,8 +572,9 @@ exprtoken(Expr_state *es)
if (c == '\0')
es->tok = END;
else if (ksh_isalphx(c)) {
for (; ksh_isalnux(c); c = *cp)
cp++;
do {
c = *++cp;
} while (ksh_isalnux(c));
if (c == '[') {
size_t len;

4
lex.c
View File

@ -23,7 +23,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.231 2017/03/22 00:20:43 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.232 2017/03/26 00:10:24 tg Exp $");
/*
* states while lexing word
@ -489,7 +489,7 @@ yylex(int cf)
* If this is a trim operation,
* treat (,|,) specially in STBRACE.
*/
if (ctype(c, C_SUBOP2)) {
if (ksh_issubop2(c)) {
ungetsc(c);
if (Flag(FSH))
PUSH_STATE(STBRACEBOURNE);

9
misc.c
View File

@ -30,7 +30,7 @@
#include <grp.h>
#endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.252 2017/03/11 23:56:17 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.253 2017/03/26 00:10:24 tg Exp $");
#define KSH_CHVT_FLAG
#ifdef MKSH_SMALL
@ -89,11 +89,10 @@ setctypes(const char *s, int t)
void
initctypes(void)
{
setctypes(letters_uc, C_ALPHA);
setctypes(letters_lc, C_ALPHA);
chtypes['_'] |= C_ALPHA;
setctypes(letters_uc, C_ALPHX);
setctypes(letters_lc, C_ALPHX);
chtypes['_'] |= C_ALPHX;
setctypes("0123456789", C_DIGIT);
/* \0 added automatically */
setctypes(TC_LEX1, C_LEX1);
setctypes("*@#!$-?", C_VAR1);
setctypes(TC_IFSWS, C_IFSWS);

21
sh.h
View File

@ -175,9 +175,9 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.797 2017/03/22 00:20:53 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.798 2017/03/26 00:10:25 tg Exp $");
#endif
#define MKSH_VERSION "R54 2017/03/21"
#define MKSH_VERSION "R54 2017/03/25"
/* arithmetic types: C implementation */
#if !HAVE_CAN_INTTYPES
@ -1243,7 +1243,7 @@ EXTERN bool really_exit;
/*
* fast character classes
*/
#define C_ALPHA BIT(0) /* a-z_A-Z */
#define C_ALPHX BIT(0) /* A-Za-z_ */
#define C_DIGIT BIT(1) /* 0-9 */
#define C_LEX1 BIT(2) /* \t \n\0|&;<>() */
#define C_VAR1 BIT(3) /* *@#!$-? */
@ -1251,17 +1251,16 @@ EXTERN bool really_exit;
#define C_SUBOP1 BIT(5) /* "=-+?" */
#define C_QUOTE BIT(6) /* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */
#define C_IFS BIT(7) /* $IFS */
#define C_SUBOP2 BIT(8) /* "#%" (magic, see below) */
extern unsigned char chtypes[];
#define ctype(c, t) tobool( ((t) == C_SUBOP2) ? \
(((c) == '#' || (c) == '%') ? 1 : 0) : \
(chtypes[(unsigned char)(c)] & (t)) )
#define ctype(c, t) tobool(chtypes[(unsigned char)(c)] & (t))
#define ord(c) ((int)(unsigned char)(c))
#define ksh_isalphx(c) ctype((c), C_ALPHA)
#define ksh_isalnux(c) ctype((c), C_ALPHA | C_DIGIT)
#define ksh_isdigit(c) (((c) >= '0') && ((c) <= '9'))
#define ksh_issubop2(c) tobool((c) == ord('#') || (c) == ord('%'))
#define ksh_isalpha(c) (ctype((c), C_ALPHX) && (c) != ord('_'))
#define ksh_isalphx(c) ctype((c), C_ALPHX)
#define ksh_isalnux(c) ctype((c), C_ALPHX | C_DIGIT)
#define ksh_isdigit(c) ctype((c), C_DIGIT)
#define ksh_islower(c) (((c) >= 'a') && ((c) <= 'z'))
#define ksh_isupper(c) (((c) >= 'A') && ((c) <= 'Z'))
#define ksh_tolower(c) (ksh_isupper(c) ? (c) - 'A' + 'a' : (c))
@ -2424,7 +2423,7 @@ extern int tty_init_fd(void); /* initialise tty_fd, tty_devtty */
#define mksh_abspath(s) __extension__({ \
const char *mksh_abspath_s = (s); \
(mksh_cdirsep(mksh_abspath_s[0]) || \
(ksh_isalphx(mksh_abspath_s[0]) && \
(ksh_isalpha(mksh_abspath_s[0]) && \
mksh_abspath_s[1] == ':')); \
})
#define mksh_cdirsep(c) __extension__({ \

9
var.c
View File

@ -2,7 +2,7 @@
/*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
* 2011, 2012, 2013, 2014, 2015, 2016
* 2011, 2012, 2013, 2014, 2015, 2016, 2017
* mirabilos <m@mirbsd.org>
*
* Provided that these terms and disclaimer and all copyright notices
@ -28,7 +28,7 @@
#include <sys/sysctl.h>
#endif
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.209 2016/11/11 23:31:39 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.210 2017/03/26 00:10:26 tg Exp $");
/*-
* Variables
@ -1053,8 +1053,9 @@ skip_varname(const char *s, bool aok)
size_t alen;
if (s && ksh_isalphx(*s)) {
while (*++s && ksh_isalnux(*s))
;
do {
++s;
} while (ksh_isalnux(*s));
if (aok && *s == '[' && (alen = array_ref_len(s)))
s += alen;
}