small character classes overhaul:
• make fast character classes even faster by removing the C_SUBOP2 hack in favour of a separate seldom-used ksh_issubop2 macro (which also makes ctype() side-effect-safe) which is a slower class (no change there) • optimise cases of ksh_isalphx followed by a ksh_isalnux loop (used parsing variable names) • remove a misleading comment in initctypes() about \0 from pdksh • rename C_ALPHA to C_ALPHX to make it more clear the underscore is included • sprinkle a few ord() in there • add new ksh_isalpha() which tests for [A-Za-z] (slow character class) • there is no '_:\' drive on OS/2 (which inspired the whole changeset)
This commit is contained in:
		
							
								
								
									
										6
									
								
								check.t
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								check.t
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | |||||||
| # $MirOS: src/bin/mksh/check.t,v 1.765 2017/03/22 00:20:39 tg Exp $ | # $MirOS: src/bin/mksh/check.t,v 1.766 2017/03/26 00:10:21 tg Exp $ | ||||||
| # -*- mode: sh -*- | # -*- mode: sh -*- | ||||||
| #- | #- | ||||||
| # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, | # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, | ||||||
| @@ -30,7 +30,7 @@ | |||||||
| # (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date | # (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date | ||||||
|  |  | ||||||
| expected-stdout: | expected-stdout: | ||||||
| 	@(#)MIRBSD KSH R54 2017/03/21 | 	@(#)MIRBSD KSH R54 2017/03/25 | ||||||
| description: | description: | ||||||
| 	Check version of shell. | 	Check version of shell. | ||||||
| stdin: | stdin: | ||||||
| @@ -39,7 +39,7 @@ name: KSH_VERSION | |||||||
| category: shell:legacy-no | category: shell:legacy-no | ||||||
| --- | --- | ||||||
| expected-stdout: | expected-stdout: | ||||||
| 	@(#)LEGACY KSH R54 2017/03/21 | 	@(#)LEGACY KSH R54 2017/03/25 | ||||||
| description: | description: | ||||||
| 	Check version of legacy shell. | 	Check version of legacy shell. | ||||||
| stdin: | stdin: | ||||||
|   | |||||||
							
								
								
									
										8
									
								
								eval.c
									
									
									
									
									
								
							
							
						
						
									
										8
									
								
								eval.c
									
									
									
									
									
								
							| @@ -23,7 +23,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.198 2017/03/11 22:49:55 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.199 2017/03/26 00:10:23 tg Exp $"); | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * string expansion |  * string expansion | ||||||
| @@ -1196,7 +1196,7 @@ varsub(Expand *xp, const char *sp, const char *word, | |||||||
| 	} else if (ctype(c, C_SUBOP1)) { | 	} else if (ctype(c, C_SUBOP1)) { | ||||||
| 		slen += 2; | 		slen += 2; | ||||||
| 		stype |= c; | 		stype |= c; | ||||||
| 	} else if (ctype(c, C_SUBOP2)) { | 	} else if (ksh_issubop2(c)) { | ||||||
| 		/* Note: ksh88 allows :%, :%%, etc */ | 		/* Note: ksh88 allows :%, :%%, etc */ | ||||||
| 		slen += 2; | 		slen += 2; | ||||||
| 		stype = c; | 		stype = c; | ||||||
| @@ -1304,7 +1304,7 @@ varsub(Expand *xp, const char *sp, const char *word, | |||||||
|  |  | ||||||
| 	c = stype & 0x7F; | 	c = stype & 0x7F; | ||||||
| 	/* test the compiler's code generator */ | 	/* test the compiler's code generator */ | ||||||
| 	if (((stype < 0x100) && (ctype(c, C_SUBOP2) || | 	if (((stype < 0x100) && (ksh_issubop2(c) || | ||||||
| 	    (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) && | 	    (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) && | ||||||
| 	    (state != XARG || (ifs0 || xp->split ? | 	    (state != XARG || (ifs0 || xp->split ? | ||||||
| 	    (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ? | 	    (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ? | ||||||
| @@ -1314,7 +1314,7 @@ varsub(Expand *xp, const char *sp, const char *word, | |||||||
| 		/* expand word instead of variable value */ | 		/* expand word instead of variable value */ | ||||||
| 		state = XBASE; | 		state = XBASE; | ||||||
| 	if (Flag(FNOUNSET) && xp->str == null && !zero_ok && | 	if (Flag(FNOUNSET) && xp->str == null && !zero_ok && | ||||||
| 	    (ctype(c, C_SUBOP2) || (state != XBASE && c != '+'))) | 	    (ksh_issubop2(c) || (state != XBASE && c != '+'))) | ||||||
| 		errorf(Tf_parm, sp); | 		errorf(Tf_parm, sp); | ||||||
| 	*stypep = stype; | 	*stypep = stype; | ||||||
| 	*slenp = slen; | 	*slenp = slen; | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								expr.c
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								expr.c
									
									
									
									
									
								
							| @@ -2,7 +2,7 @@ | |||||||
|  |  | ||||||
| /*- | /*- | ||||||
|  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, |  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, | ||||||
|  *		 2011, 2012, 2013, 2014, 2016 |  *		 2011, 2012, 2013, 2014, 2016, 2017 | ||||||
|  *	mirabilos <m@mirbsd.org> |  *	mirabilos <m@mirbsd.org> | ||||||
|  * |  * | ||||||
|  * Provided that these terms and disclaimer and all copyright notices |  * Provided that these terms and disclaimer and all copyright notices | ||||||
| @@ -23,7 +23,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.90 2016/11/07 16:58:48 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.91 2017/03/26 00:10:23 tg Exp $"); | ||||||
|  |  | ||||||
| #define EXPRTOK_DEFNS | #define EXPRTOK_DEFNS | ||||||
| #include "exprtok.h" | #include "exprtok.h" | ||||||
| @@ -572,8 +572,9 @@ exprtoken(Expr_state *es) | |||||||
| 	if (c == '\0') | 	if (c == '\0') | ||||||
| 		es->tok = END; | 		es->tok = END; | ||||||
| 	else if (ksh_isalphx(c)) { | 	else if (ksh_isalphx(c)) { | ||||||
| 		for (; ksh_isalnux(c); c = *cp) | 		do { | ||||||
| 			cp++; | 			c = *++cp; | ||||||
|  | 		} while (ksh_isalnux(c)); | ||||||
| 		if (c == '[') { | 		if (c == '[') { | ||||||
| 			size_t len; | 			size_t len; | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								lex.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								lex.c
									
									
									
									
									
								
							| @@ -23,7 +23,7 @@ | |||||||
|  |  | ||||||
| #include "sh.h" | #include "sh.h" | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.231 2017/03/22 00:20:43 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.232 2017/03/26 00:10:24 tg Exp $"); | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * states while lexing word |  * states while lexing word | ||||||
| @@ -489,7 +489,7 @@ yylex(int cf) | |||||||
| 					 * If this is a trim operation, | 					 * If this is a trim operation, | ||||||
| 					 * treat (,|,) specially in STBRACE. | 					 * treat (,|,) specially in STBRACE. | ||||||
| 					 */ | 					 */ | ||||||
| 					if (ctype(c, C_SUBOP2)) { | 					if (ksh_issubop2(c)) { | ||||||
| 						ungetsc(c); | 						ungetsc(c); | ||||||
| 						if (Flag(FSH)) | 						if (Flag(FSH)) | ||||||
| 							PUSH_STATE(STBRACEBOURNE); | 							PUSH_STATE(STBRACEBOURNE); | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								misc.c
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								misc.c
									
									
									
									
									
								
							| @@ -30,7 +30,7 @@ | |||||||
| #include <grp.h> | #include <grp.h> | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.252 2017/03/11 23:56:17 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.253 2017/03/26 00:10:24 tg Exp $"); | ||||||
|  |  | ||||||
| #define KSH_CHVT_FLAG | #define KSH_CHVT_FLAG | ||||||
| #ifdef MKSH_SMALL | #ifdef MKSH_SMALL | ||||||
| @@ -89,11 +89,10 @@ setctypes(const char *s, int t) | |||||||
| void | void | ||||||
| initctypes(void) | initctypes(void) | ||||||
| { | { | ||||||
| 	setctypes(letters_uc, C_ALPHA); | 	setctypes(letters_uc, C_ALPHX); | ||||||
| 	setctypes(letters_lc, C_ALPHA); | 	setctypes(letters_lc, C_ALPHX); | ||||||
| 	chtypes['_'] |= C_ALPHA; | 	chtypes['_'] |= C_ALPHX; | ||||||
| 	setctypes("0123456789", C_DIGIT); | 	setctypes("0123456789", C_DIGIT); | ||||||
| 	/* \0 added automatically */ |  | ||||||
| 	setctypes(TC_LEX1, C_LEX1); | 	setctypes(TC_LEX1, C_LEX1); | ||||||
| 	setctypes("*@#!$-?", C_VAR1); | 	setctypes("*@#!$-?", C_VAR1); | ||||||
| 	setctypes(TC_IFSWS, C_IFSWS); | 	setctypes(TC_IFSWS, C_IFSWS); | ||||||
|   | |||||||
							
								
								
									
										21
									
								
								sh.h
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								sh.h
									
									
									
									
									
								
							| @@ -175,9 +175,9 @@ | |||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #ifdef EXTERN | #ifdef EXTERN | ||||||
| __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.797 2017/03/22 00:20:53 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.798 2017/03/26 00:10:25 tg Exp $"); | ||||||
| #endif | #endif | ||||||
| #define MKSH_VERSION "R54 2017/03/21" | #define MKSH_VERSION "R54 2017/03/25" | ||||||
|  |  | ||||||
| /* arithmetic types: C implementation */ | /* arithmetic types: C implementation */ | ||||||
| #if !HAVE_CAN_INTTYPES | #if !HAVE_CAN_INTTYPES | ||||||
| @@ -1243,7 +1243,7 @@ EXTERN bool really_exit; | |||||||
| /* | /* | ||||||
|  * fast character classes |  * fast character classes | ||||||
|  */ |  */ | ||||||
| #define C_ALPHA	 BIT(0)		/* a-z_A-Z */ | #define C_ALPHX	 BIT(0)		/* A-Za-z_ */ | ||||||
| #define C_DIGIT	 BIT(1)		/* 0-9 */ | #define C_DIGIT	 BIT(1)		/* 0-9 */ | ||||||
| #define C_LEX1	 BIT(2)		/* \t \n\0|&;<>() */ | #define C_LEX1	 BIT(2)		/* \t \n\0|&;<>() */ | ||||||
| #define C_VAR1	 BIT(3)		/* *@#!$-? */ | #define C_VAR1	 BIT(3)		/* *@#!$-? */ | ||||||
| @@ -1251,17 +1251,16 @@ EXTERN bool really_exit; | |||||||
| #define C_SUBOP1 BIT(5)		/* "=-+?" */ | #define C_SUBOP1 BIT(5)		/* "=-+?" */ | ||||||
| #define C_QUOTE	 BIT(6)		/* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */ | #define C_QUOTE	 BIT(6)		/* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */ | ||||||
| #define C_IFS	 BIT(7)		/* $IFS */ | #define C_IFS	 BIT(7)		/* $IFS */ | ||||||
| #define C_SUBOP2 BIT(8)		/* "#%" (magic, see below) */ |  | ||||||
|  |  | ||||||
| extern unsigned char chtypes[]; | extern unsigned char chtypes[]; | ||||||
|  |  | ||||||
| #define ctype(c, t)	tobool( ((t) == C_SUBOP2) ?			\ | #define ctype(c, t)	tobool(chtypes[(unsigned char)(c)] & (t)) | ||||||
| 			    (((c) == '#' || (c) == '%') ? 1 : 0) :	\ |  | ||||||
| 			    (chtypes[(unsigned char)(c)] & (t)) ) |  | ||||||
| #define ord(c)		((int)(unsigned char)(c)) | #define ord(c)		((int)(unsigned char)(c)) | ||||||
| #define ksh_isalphx(c)	ctype((c), C_ALPHA) | #define ksh_issubop2(c)	tobool((c) == ord('#') || (c) == ord('%')) | ||||||
| #define ksh_isalnux(c)	ctype((c), C_ALPHA | C_DIGIT) | #define ksh_isalpha(c)	(ctype((c), C_ALPHX) && (c) != ord('_')) | ||||||
| #define ksh_isdigit(c)	(((c) >= '0') && ((c) <= '9')) | #define ksh_isalphx(c)	ctype((c), C_ALPHX) | ||||||
|  | #define ksh_isalnux(c)	ctype((c), C_ALPHX | C_DIGIT) | ||||||
|  | #define ksh_isdigit(c)	ctype((c), C_DIGIT) | ||||||
| #define ksh_islower(c)	(((c) >= 'a') && ((c) <= 'z')) | #define ksh_islower(c)	(((c) >= 'a') && ((c) <= 'z')) | ||||||
| #define ksh_isupper(c)	(((c) >= 'A') && ((c) <= 'Z')) | #define ksh_isupper(c)	(((c) >= 'A') && ((c) <= 'Z')) | ||||||
| #define ksh_tolower(c)	(ksh_isupper(c) ? (c) - 'A' + 'a' : (c)) | #define ksh_tolower(c)	(ksh_isupper(c) ? (c) - 'A' + 'a' : (c)) | ||||||
| @@ -2424,7 +2423,7 @@ extern int tty_init_fd(void);	/* initialise tty_fd, tty_devtty */ | |||||||
| #define mksh_abspath(s)			__extension__({			\ | #define mksh_abspath(s)			__extension__({			\ | ||||||
| 	const char *mksh_abspath_s = (s);				\ | 	const char *mksh_abspath_s = (s);				\ | ||||||
| 	(mksh_cdirsep(mksh_abspath_s[0]) ||				\ | 	(mksh_cdirsep(mksh_abspath_s[0]) ||				\ | ||||||
| 	    (ksh_isalphx(mksh_abspath_s[0]) &&				\ | 	    (ksh_isalpha(mksh_abspath_s[0]) &&				\ | ||||||
| 	    mksh_abspath_s[1] == ':'));					\ | 	    mksh_abspath_s[1] == ':'));					\ | ||||||
| }) | }) | ||||||
| #define mksh_cdirsep(c)			__extension__({			\ | #define mksh_cdirsep(c)			__extension__({			\ | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								var.c
									
									
									
									
									
								
							
							
						
						
									
										9
									
								
								var.c
									
									
									
									
									
								
							| @@ -2,7 +2,7 @@ | |||||||
|  |  | ||||||
| /*- | /*- | ||||||
|  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, |  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, | ||||||
|  *		 2011, 2012, 2013, 2014, 2015, 2016 |  *		 2011, 2012, 2013, 2014, 2015, 2016, 2017 | ||||||
|  *	mirabilos <m@mirbsd.org> |  *	mirabilos <m@mirbsd.org> | ||||||
|  * |  * | ||||||
|  * Provided that these terms and disclaimer and all copyright notices |  * Provided that these terms and disclaimer and all copyright notices | ||||||
| @@ -28,7 +28,7 @@ | |||||||
| #include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| __RCSID("$MirOS: src/bin/mksh/var.c,v 1.209 2016/11/11 23:31:39 tg Exp $"); | __RCSID("$MirOS: src/bin/mksh/var.c,v 1.210 2017/03/26 00:10:26 tg Exp $"); | ||||||
|  |  | ||||||
| /*- | /*- | ||||||
|  * Variables |  * Variables | ||||||
| @@ -1053,8 +1053,9 @@ skip_varname(const char *s, bool aok) | |||||||
| 	size_t alen; | 	size_t alen; | ||||||
|  |  | ||||||
| 	if (s && ksh_isalphx(*s)) { | 	if (s && ksh_isalphx(*s)) { | ||||||
| 		while (*++s && ksh_isalnux(*s)) | 		do { | ||||||
| 			; | 			++s; | ||||||
|  | 		} while (ksh_isalnux(*s)); | ||||||
| 		if (aok && *s == '[' && (alen = array_ref_len(s))) | 		if (aok && *s == '[' && (alen = array_ref_len(s))) | ||||||
| 			s += alen; | 			s += alen; | ||||||
| 	} | 	} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user