small character classes overhaul:

• make fast character classes even faster by removing the C_SUBOP2 hack
  in favour of a separate seldom-used ksh_issubop2 macro (which also
  makes ctype() side-effect-safe) which is a slower class (no change there)
• optimise cases of ksh_isalphx followed by a ksh_isalnux loop
  (used parsing variable names)
• remove a misleading comment in initctypes() about \0 from pdksh
• rename C_ALPHA to C_ALPHX to make it more clear the underscore is included
• sprinkle a few ord() in there
• add new ksh_isalpha() which tests for [A-Za-z] (slow character class)
• there is no '_:\' drive on OS/2 (which inspired the whole changeset)
This commit is contained in:
tg
2017-03-26 00:10:26 +00:00
parent 034d0c0269
commit 7529e350cf
7 changed files with 33 additions and 33 deletions

View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.765 2017/03/22 00:20:39 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.766 2017/03/26 00:10:21 tg Exp $
# -*- mode: sh -*- # -*- mode: sh -*-
#- #-
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@ -30,7 +30,7 @@
# (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date # (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date
expected-stdout: expected-stdout:
@(#)MIRBSD KSH R54 2017/03/21 @(#)MIRBSD KSH R54 2017/03/25
description: description:
Check version of shell. Check version of shell.
stdin: stdin:
@ -39,7 +39,7 @@ name: KSH_VERSION
category: shell:legacy-no category: shell:legacy-no
--- ---
expected-stdout: expected-stdout:
@(#)LEGACY KSH R54 2017/03/21 @(#)LEGACY KSH R54 2017/03/25
description: description:
Check version of legacy shell. Check version of legacy shell.
stdin: stdin:

8
eval.c
View File

@ -23,7 +23,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.198 2017/03/11 22:49:55 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.199 2017/03/26 00:10:23 tg Exp $");
/* /*
* string expansion * string expansion
@ -1196,7 +1196,7 @@ varsub(Expand *xp, const char *sp, const char *word,
} else if (ctype(c, C_SUBOP1)) { } else if (ctype(c, C_SUBOP1)) {
slen += 2; slen += 2;
stype |= c; stype |= c;
} else if (ctype(c, C_SUBOP2)) { } else if (ksh_issubop2(c)) {
/* Note: ksh88 allows :%, :%%, etc */ /* Note: ksh88 allows :%, :%%, etc */
slen += 2; slen += 2;
stype = c; stype = c;
@ -1304,7 +1304,7 @@ varsub(Expand *xp, const char *sp, const char *word,
c = stype & 0x7F; c = stype & 0x7F;
/* test the compiler's code generator */ /* test the compiler's code generator */
if (((stype < 0x100) && (ctype(c, C_SUBOP2) || if (((stype < 0x100) && (ksh_issubop2(c) ||
(((stype & 0x80) ? *xp->str == '\0' : xp->str == null) && (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) &&
(state != XARG || (ifs0 || xp->split ? (state != XARG || (ifs0 || xp->split ?
(xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ? (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
@ -1314,7 +1314,7 @@ varsub(Expand *xp, const char *sp, const char *word,
/* expand word instead of variable value */ /* expand word instead of variable value */
state = XBASE; state = XBASE;
if (Flag(FNOUNSET) && xp->str == null && !zero_ok && if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
(ctype(c, C_SUBOP2) || (state != XBASE && c != '+'))) (ksh_issubop2(c) || (state != XBASE && c != '+')))
errorf(Tf_parm, sp); errorf(Tf_parm, sp);
*stypep = stype; *stypep = stype;
*slenp = slen; *slenp = slen;

9
expr.c
View File

@ -2,7 +2,7 @@
/*- /*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
* 2011, 2012, 2013, 2014, 2016 * 2011, 2012, 2013, 2014, 2016, 2017
* mirabilos <m@mirbsd.org> * mirabilos <m@mirbsd.org>
* *
* Provided that these terms and disclaimer and all copyright notices * Provided that these terms and disclaimer and all copyright notices
@ -23,7 +23,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.90 2016/11/07 16:58:48 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.91 2017/03/26 00:10:23 tg Exp $");
#define EXPRTOK_DEFNS #define EXPRTOK_DEFNS
#include "exprtok.h" #include "exprtok.h"
@ -572,8 +572,9 @@ exprtoken(Expr_state *es)
if (c == '\0') if (c == '\0')
es->tok = END; es->tok = END;
else if (ksh_isalphx(c)) { else if (ksh_isalphx(c)) {
for (; ksh_isalnux(c); c = *cp) do {
cp++; c = *++cp;
} while (ksh_isalnux(c));
if (c == '[') { if (c == '[') {
size_t len; size_t len;

4
lex.c
View File

@ -23,7 +23,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.231 2017/03/22 00:20:43 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.232 2017/03/26 00:10:24 tg Exp $");
/* /*
* states while lexing word * states while lexing word
@ -489,7 +489,7 @@ yylex(int cf)
* If this is a trim operation, * If this is a trim operation,
* treat (,|,) specially in STBRACE. * treat (,|,) specially in STBRACE.
*/ */
if (ctype(c, C_SUBOP2)) { if (ksh_issubop2(c)) {
ungetsc(c); ungetsc(c);
if (Flag(FSH)) if (Flag(FSH))
PUSH_STATE(STBRACEBOURNE); PUSH_STATE(STBRACEBOURNE);

9
misc.c
View File

@ -30,7 +30,7 @@
#include <grp.h> #include <grp.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.252 2017/03/11 23:56:17 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.253 2017/03/26 00:10:24 tg Exp $");
#define KSH_CHVT_FLAG #define KSH_CHVT_FLAG
#ifdef MKSH_SMALL #ifdef MKSH_SMALL
@ -89,11 +89,10 @@ setctypes(const char *s, int t)
void void
initctypes(void) initctypes(void)
{ {
setctypes(letters_uc, C_ALPHA); setctypes(letters_uc, C_ALPHX);
setctypes(letters_lc, C_ALPHA); setctypes(letters_lc, C_ALPHX);
chtypes['_'] |= C_ALPHA; chtypes['_'] |= C_ALPHX;
setctypes("0123456789", C_DIGIT); setctypes("0123456789", C_DIGIT);
/* \0 added automatically */
setctypes(TC_LEX1, C_LEX1); setctypes(TC_LEX1, C_LEX1);
setctypes("*@#!$-?", C_VAR1); setctypes("*@#!$-?", C_VAR1);
setctypes(TC_IFSWS, C_IFSWS); setctypes(TC_IFSWS, C_IFSWS);

21
sh.h
View File

@ -175,9 +175,9 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.797 2017/03/22 00:20:53 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.798 2017/03/26 00:10:25 tg Exp $");
#endif #endif
#define MKSH_VERSION "R54 2017/03/21" #define MKSH_VERSION "R54 2017/03/25"
/* arithmetic types: C implementation */ /* arithmetic types: C implementation */
#if !HAVE_CAN_INTTYPES #if !HAVE_CAN_INTTYPES
@ -1243,7 +1243,7 @@ EXTERN bool really_exit;
/* /*
* fast character classes * fast character classes
*/ */
#define C_ALPHA BIT(0) /* a-z_A-Z */ #define C_ALPHX BIT(0) /* A-Za-z_ */
#define C_DIGIT BIT(1) /* 0-9 */ #define C_DIGIT BIT(1) /* 0-9 */
#define C_LEX1 BIT(2) /* \t \n\0|&;<>() */ #define C_LEX1 BIT(2) /* \t \n\0|&;<>() */
#define C_VAR1 BIT(3) /* *@#!$-? */ #define C_VAR1 BIT(3) /* *@#!$-? */
@ -1251,17 +1251,16 @@ EXTERN bool really_exit;
#define C_SUBOP1 BIT(5) /* "=-+?" */ #define C_SUBOP1 BIT(5) /* "=-+?" */
#define C_QUOTE BIT(6) /* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */ #define C_QUOTE BIT(6) /* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */
#define C_IFS BIT(7) /* $IFS */ #define C_IFS BIT(7) /* $IFS */
#define C_SUBOP2 BIT(8) /* "#%" (magic, see below) */
extern unsigned char chtypes[]; extern unsigned char chtypes[];
#define ctype(c, t) tobool( ((t) == C_SUBOP2) ? \ #define ctype(c, t) tobool(chtypes[(unsigned char)(c)] & (t))
(((c) == '#' || (c) == '%') ? 1 : 0) : \
(chtypes[(unsigned char)(c)] & (t)) )
#define ord(c) ((int)(unsigned char)(c)) #define ord(c) ((int)(unsigned char)(c))
#define ksh_isalphx(c) ctype((c), C_ALPHA) #define ksh_issubop2(c) tobool((c) == ord('#') || (c) == ord('%'))
#define ksh_isalnux(c) ctype((c), C_ALPHA | C_DIGIT) #define ksh_isalpha(c) (ctype((c), C_ALPHX) && (c) != ord('_'))
#define ksh_isdigit(c) (((c) >= '0') && ((c) <= '9')) #define ksh_isalphx(c) ctype((c), C_ALPHX)
#define ksh_isalnux(c) ctype((c), C_ALPHX | C_DIGIT)
#define ksh_isdigit(c) ctype((c), C_DIGIT)
#define ksh_islower(c) (((c) >= 'a') && ((c) <= 'z')) #define ksh_islower(c) (((c) >= 'a') && ((c) <= 'z'))
#define ksh_isupper(c) (((c) >= 'A') && ((c) <= 'Z')) #define ksh_isupper(c) (((c) >= 'A') && ((c) <= 'Z'))
#define ksh_tolower(c) (ksh_isupper(c) ? (c) - 'A' + 'a' : (c)) #define ksh_tolower(c) (ksh_isupper(c) ? (c) - 'A' + 'a' : (c))
@ -2424,7 +2423,7 @@ extern int tty_init_fd(void); /* initialise tty_fd, tty_devtty */
#define mksh_abspath(s) __extension__({ \ #define mksh_abspath(s) __extension__({ \
const char *mksh_abspath_s = (s); \ const char *mksh_abspath_s = (s); \
(mksh_cdirsep(mksh_abspath_s[0]) || \ (mksh_cdirsep(mksh_abspath_s[0]) || \
(ksh_isalphx(mksh_abspath_s[0]) && \ (ksh_isalpha(mksh_abspath_s[0]) && \
mksh_abspath_s[1] == ':')); \ mksh_abspath_s[1] == ':')); \
}) })
#define mksh_cdirsep(c) __extension__({ \ #define mksh_cdirsep(c) __extension__({ \

9
var.c
View File

@ -2,7 +2,7 @@
/*- /*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
* 2011, 2012, 2013, 2014, 2015, 2016 * 2011, 2012, 2013, 2014, 2015, 2016, 2017
* mirabilos <m@mirbsd.org> * mirabilos <m@mirbsd.org>
* *
* Provided that these terms and disclaimer and all copyright notices * Provided that these terms and disclaimer and all copyright notices
@ -28,7 +28,7 @@
#include <sys/sysctl.h> #include <sys/sysctl.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.209 2016/11/11 23:31:39 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/var.c,v 1.210 2017/03/26 00:10:26 tg Exp $");
/*- /*-
* Variables * Variables
@ -1053,8 +1053,9 @@ skip_varname(const char *s, bool aok)
size_t alen; size_t alen;
if (s && ksh_isalphx(*s)) { if (s && ksh_isalphx(*s)) {
while (*++s && ksh_isalnux(*s)) do {
; ++s;
} while (ksh_isalnux(*s));
if (aok && *s == '[' && (alen = array_ref_len(s))) if (aok && *s == '[' && (alen = array_ref_len(s)))
s += alen; s += alen;
} }