From b228c59895a23cd51c0badc435dc43335fb43503 Mon Sep 17 00:00:00 2001 From: tg Date: Thu, 27 Apr 2017 19:16:10 +0000 Subject: [PATCH] convert to the new fast character classes --- eval.c | 4 ++-- main.c | 52 ++++++++++++++++++++++++++++--------------- misc.c | 33 +--------------------------- sh.h | 69 +++++++++++++++++++++------------------------------------- shf.c | 8 +++++-- 5 files changed, 68 insertions(+), 98 deletions(-) diff --git a/eval.c b/eval.c index 5a1760c..35f2e63 100644 --- a/eval.c +++ b/eval.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.203 2017/04/22 00:07:08 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.204 2017/04/27 19:16:07 tg Exp $"); /* * string expansion @@ -1224,7 +1224,7 @@ varsub(Expand *xp, const char *sp, const char *word, } } else if (stype == 0x80 && (c == ' ' || c == '0')) { stype |= '0'; - } else if (ctype(c, C_SUBOP1)) { + } else if (ctype(c, C_SUB1)) { slen += 2; stype |= c; } else if (ksh_issubop2(c)) { diff --git a/main.c b/main.c index 0841efd..6278cec 100644 --- a/main.c +++ b/main.c @@ -34,7 +34,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/main.c,v 1.335 2017/04/27 18:44:35 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/main.c,v 1.336 2017/04/27 19:16:08 tg Exp $"); extern char **environ; @@ -236,6 +236,8 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp) ssize_t k; #endif + set_ifs(TC_IFSWS); + #ifdef __OS2__ for (i = 0; i < 3; ++i) if (!isatty(i)) @@ -333,9 +335,6 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp) initvar(); - /*XXX do this earlier, just call set_ifs(TC_IFSWS); with the new scheme, then ifs0 need not be E_INIT’d, drop initctypes and setctypes from misc.c/sh.h then */ - initctypes(); - inittraps(); coproc_init(); @@ -431,23 +430,40 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp) shellf(" .\n"); \ } while (0) #define dmpct(a,b) dmpcf(a,ctype(c,b)) +dmpct("C_ALIAS",C_ALIAS); +dmpct("C_ALNUM",C_ALNUM); +dmpct("C_ALNUX",C_ALNUX); +dmpct("C_ALPHA",C_ALPHA); dmpct("C_ALPHX",C_ALPHX); +dmpct("C_BLANK",C_BLANK); +dmpct("C_CFS",C_CFS); +dmpct("C_CNTRL",C_CNTRL); dmpct("C_DIGIT",C_DIGIT); -dmpct("C_LEX1",C_LEX1); -dmpct("C_VAR1",C_VAR1); -dmpct("C_IFSWS",C_IFSWS); -dmpct("C_SUB1",C_SUBOP1); -dmpct("C_QUOTE",C_QUOTE); +dmpct("C_DOLAR",C_DOLAR); +dmpct("C_GRAPH",C_GRAPH); +dmpct("C_HEXLT",C_HEXLT); dmpct("C_IFS",C_IFS); -dmpcf("C_SUB2",ksh_issubop2(c)); -dmpcf("C_ALIAS",ksh_isalias(c)); -dmpcf("C_ALPHA",ksh_isalpha(c)); -dmpcf("C_ALNUX",ksh_isalnux(c)); -dmpcf("C_LOWER",ksh_islower(c)); -dmpcf("C_UPPER",ksh_isupper(c)); -dmpcf("C_SPACE",ksh_isspace(c)); -dmpcf("C_CFS",is_cfs(c)); -dmpcf("C_MFS",is_mfs(c)); +dmpct("C_IFSWS",C_IFSWS); +dmpct("C_LEX1",C_LEX1); +dmpct("C_LF",C_LF); +dmpct("C_LOWER",C_LOWER); +dmpct("C_MFS",C_MFS); +dmpct("C_NL",C_NL); +dmpct("C_NUL",C_NUL); +dmpct("C_OCTAL",C_OCTAL); +dmpct("C_PRINT",C_PRINT); +dmpct("C_PUNCT",C_PUNCT); +dmpct("C_QC",C_QC); +dmpct("C_QUOTE",C_QUOTE); +dmpct("C_SEDEC",C_SEDEC); +dmpct("C_SPACE",C_SPACE); +dmpct("C_SPC",C_SPC); +dmpct("C_SUB1",C_SUB1); +dmpct("C_SUB2",C_SUB2); +dmpct("C_TAB",C_TAB); +dmpct("C_UNDER",C_UNDER); +dmpct("C_UPPER",C_UPPER); +dmpct("C_VAR1",C_VAR1); exit(0); /* assign default shell variable values */ diff --git a/misc.c b/misc.c index d072862..bd1df73 100644 --- a/misc.c +++ b/misc.c @@ -30,7 +30,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.258 2017/04/21 20:06:05 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.259 2017/04/27 19:16:08 tg Exp $"); #define KSH_CHVT_FLAG #ifdef MKSH_SMALL @@ -68,37 +68,6 @@ static int make_path(const char *, const char *, char **, XString *, int *); #define DO_SETUID(func, argvec) func argvec #endif -/* - * Fast character classes - */ -void -setctypes(const char *s, int t) -{ - if (t & C_IFS) { - unsigned int i = 0; - - while (++i <= UCHAR_MAX) - chtypes[i] &= ~C_IFS; - /* include '\0' in C_IFS */ - chtypes[0] |= C_IFS; - } - while (*s != 0) - chtypes[(unsigned char)*s++] |= t; -} - -void -initctypes(void) -{ - setctypes(letters_uc, C_ALPHX); - setctypes(letters_lc, C_ALPHX); - chtypes['_'] |= C_ALPHX; - setctypes("0123456789", C_DIGIT); - setctypes(TC_LEX1, C_LEX1); - setctypes("*@#!$-?", C_VAR1); - setctypes(TC_IFSWS, C_IFSWS); - setctypes("=-+?", C_SUBOP1); - setctypes("\t\n \"#$&'()*;<=>?[\\]`|", C_QUOTE); -} /* called from XcheckN() to grow buffer */ char * diff --git a/sh.h b/sh.h index 20d5c74..9a56a55 100644 --- a/sh.h +++ b/sh.h @@ -175,7 +175,7 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.814 2017/04/22 00:07:09 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.815 2017/04/27 19:16:09 tg Exp $"); #endif #define MKSH_VERSION "R55 2017/04/20" @@ -533,8 +533,6 @@ EXTERN const char initvsn[] E_INIT("KSH_VERSION=@(#)" KSH_VERSIONNAME_ISLEGACY \ EXTERN const char digits_uc[] E_INIT("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); EXTERN const char digits_lc[] E_INIT("0123456789abcdefghijklmnopqrstuvwxyz"); -#define letters_uc (digits_uc + 10) -#define letters_lc (digits_lc + 10) /* * Evil hack for const correctness due to API brokenness @@ -868,8 +866,7 @@ EXTERN char null[] E_INIT(""); EXTERN const char T4spaces[] E_INIT(" "); #define T1space (Treal_sp2 + 5) #define Tcolsp (Tf_sD_ + 2) -EXTERN const char TC_LEX1[] E_INIT("|&;<>() \t\n"); -#define TC_IFSWS (TC_LEX1 + 7) +#define TC_IFSWS (TinitIFS + 4) EXTERN const char TinitIFS[] E_INIT("IFS= \t\n"); EXTERN const char TFCEDIT_dollaru[] E_INIT("${FCEDIT:-/bin/ed} $_"); #define Tspdollaru (TFCEDIT_dollaru + 18) @@ -1030,7 +1027,6 @@ EXTERN const char T_devtty[] E_INIT("/dev/tty"); #define T4spaces " " #define T1space " " #define Tcolsp ": " -#define TC_LEX1 "|&;<>() \t\n" #define TC_IFSWS " \t\n" #define TinitIFS "IFS= \t\n" #define TFCEDIT_dollaru "${FCEDIT:-/bin/ed} $_" @@ -1321,8 +1317,9 @@ EXTERN bool really_exit; extern const uint32_t tpl_ctypes[128]; /* run-time, contains C_IFS as well, full 2⁸ octet range */ EXTERN uint32_t ksh_ctypes[256]; +/* first octet of $IFS, for concatenating "$*" */ +EXTERN char ifs0; -#if 0 /* external types */ /* 0‥9A‥Za‥z!%,-.@ valid characters in alias name */ @@ -1391,50 +1388,36 @@ EXTERN uint32_t ksh_ctypes[256]; #define C_SPC CiSP /* \x20 ASCII space */ #define C_TAB CiTAB /* \x09 ASCII horizontal tabulator */ #define C_UNDER CiUNDER /* _ underscore */ -#endif -/* legacy not-so-fast character classes */ - -#define C_ALPHX BIT(0) /* A-Za-z_ */ -#define C_DIGIT BIT(1) /* 0-9 */ -#define C_LEX1 BIT(2) /* \t \n\0|&;<>() */ -#define C_VAR1 BIT(3) /* *@#!$-? */ -#define C_IFSWS BIT(4) /* \t \n (IFS white space) */ -#define C_SUBOP1 BIT(5) /* "=-+?" */ -#define C_QUOTE BIT(6) /* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */ -#define C_IFS BIT(7) /* $IFS */ - -extern unsigned char chtypes[]; - -#define ctype(c, t) tobool(chtypes[(unsigned char)(c)] & (t)) +/* identity transform of octet */ #define ord(c) ((unsigned int)(unsigned char)(c)) -/* identity transformation in !EBCDIC; Unicode map (or higher) in EBCDIC */ +/* identity transformation in !EBCDIC; ASCII or high in EBCDIC */ #define asc(c) ord(c) -#define ksh_issubop2(c) tobool(ord(c) == ord('#') || ord(c) == ord('%')) -#define ksh_isalias(c) (ctype((c), C_ALPHX | C_DIGIT) || \ - ord(c) == ord('!') || ord(c) == ord('%') || \ - ord(c) == ord(',') || ord(c) == ord('.') || \ - ord(c) == ord('@') || ord(c) == ord('-')) -#define ksh_isalpha(c) (ctype((c), C_ALPHX) && ord(c) != ord('_')) -#define ksh_isalphx(c) ctype((c), C_ALPHX) -#define ksh_isalnux(c) ctype((c), C_ALPHX | C_DIGIT) -#define ksh_isdigit(c) ctype((c), C_DIGIT) -#define ksh_islower(c) ((asc(c) >= asc('a')) && (asc(c) <= asc('z'))) -#define ksh_isupper(c) ((asc(c) >= asc('A')) && (asc(c) <= asc('Z'))) +/* EBCDIC needs to compare c with both u and l */ +#define ksh_eq(c,u,l) (((c) | 0x20) == (l)) +/* new fast character classes */ +#define ctype(c, t) tobool(ksh_ctypes[ord(c)] & (t)) +/* helper functions */ +#define ksh_isdash(s) tobool(ord((s)[0]) == '-' && ord((s)[1]) == '\0') +/* invariant distance even in EBCDIC */ #define ksh_tolower(c) (ksh_isupper(c) ? (c) - 'A' + 'a' : (c)) #define ksh_toupper(c) (ksh_islower(c) ? (c) - 'a' + 'A' : (c)) -#define ksh_isdash(s) (((s)[0] == '-') && ((s)[1] == '\0')) -#define ksh_isspace(c) ((((c) >= 0x09) && ((c) <= 0x0D)) || ((c) == 0x20)) -/* EBCDIC needs to compare c with both */ -#define ksh_eq(c,u,l) (((c) | 0x20) == (l)) /* strictly speaking asc() here, but this works even in EBCDIC */ #define ksh_numdig(c) (ord(c) - ord('0')) #define ksh_numuc(c) (asc(c) - asc('A')) #define ksh_numlc(c) (asc(c) - asc('a')) -#define is_cfs(c) ((c) == ' ' || (c) == '\t' || (c) == '"' || (c) == '\'') /* legacy */ -#define is_mfs(c) (!(ksh_isalnux(c) || (c) == '$' || ((c) & 0x80))) /* legacy */ - -EXTERN char ifs0 E_INIT(' '); /* for "$*" */ +/* legacy functions */ +#define ksh_issubop2(c) ctype((c), C_SUB2) +#define ksh_isalias(c) ctype((c), C_ALIAS) +#define ksh_isalpha(c) ctype((c), C_ALPHA) +#define ksh_isalphx(c) ctype((c), C_ALPHX) +#define ksh_isalnux(c) ctype((c), C_ALNUX) +#define ksh_isdigit(c) ctype((c), C_DIGIT) +#define ksh_islower(c) ctype((c), C_LOWER) +#define ksh_isupper(c) ctype((c), C_UPPER) +#define ksh_isspace(c) ctype((c), C_SPACE) +#define is_cfs(c) ctype((c), C_CFS) +#define is_mfs(c) ctype((c), C_MFS) /* Argument parsing for built-in commands and getopts command */ @@ -2390,8 +2373,6 @@ void DF(const char *, ...) MKSH_A_FORMAT(__printf__, 1, 2); #endif /* misc.c */ -void setctypes(const char *, int); -void initctypes(void); size_t option(const char *) MKSH_A_PURE; char *getoptions(void); void change_flag(enum sh_flag, int, bool); diff --git a/shf.c b/shf.c index a54de07..f02c53d 100644 --- a/shf.c +++ b/shf.c @@ -25,7 +25,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.80 2017/04/22 00:07:10 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.81 2017/04/27 19:16:10 tg Exp $"); /* flags to shf_emptybuf() */ #define EB_READSW 0x01 /* about to switch to reading */ @@ -1222,6 +1222,10 @@ const uint32_t tpl_ctypes[128] = { void set_ifs(const char *s) { - setctypes(s, C_IFS); ifs0 = *s; + memcpy(ksh_ctypes, tpl_ctypes, sizeof(tpl_ctypes)); + memset(ksh_ctypes + sizeof(tpl_ctypes), '\0', + sizeof(ksh_ctypes) - sizeof(tpl_ctypes)); + while (*s) + ksh_ctypes[ord(*s++)] |= CiIFS; }