reimplement has_globbing() with proper bracket expression parsing,
and take ahead parsing collating symbols, equivalence classes and character classes already (heck my first draft of this already did better than GNU bash, ksh93 I still don’t grok its code at all)
This commit is contained in:
		
							
								
								
									
										39
									
								
								check.t
									
									
									
									
									
								
							
							
						
						
									
										39
									
								
								check.t
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | ||||
| # $MirOS: src/bin/mksh/check.t,v 1.785 2017/04/29 15:18:25 tg Exp $ | ||||
| # $MirOS: src/bin/mksh/check.t,v 1.786 2017/04/29 21:49:04 tg Exp $ | ||||
| # -*- mode: sh -*- | ||||
| #- | ||||
| # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, | ||||
| @@ -1962,15 +1962,11 @@ expected-stdout: | ||||
| name: eglob-bad-1 | ||||
| description: | ||||
| 	Check that globbing isn't done when glob has syntax error | ||||
| file-setup: file 644 "abcx" | ||||
| file-setup: file 644 "abcz" | ||||
| file-setup: file 644 "bbc" | ||||
| file-setup: file 644 "@(a[b|)c]foo" | ||||
| stdin: | ||||
| 	echo !([*)* | ||||
| 	echo +(a|b[)* | ||||
| 	echo @(a[b|)c]* | ||||
| expected-stdout: | ||||
| 	!([*)* | ||||
| 	+(a|b[)* | ||||
| 	@(a[b|)c]* | ||||
| --- | ||||
| name: eglob-bad-2 | ||||
| description: | ||||
| @@ -2057,9 +2053,11 @@ stdin: | ||||
| 	case foo in *(a|b[)) echo yes;; *) echo no;; esac | ||||
| 	case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac | ||||
| 	case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac | ||||
| 	case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac | ||||
| expected-stdout: | ||||
| 	no | ||||
| 	yes | ||||
| 	no | ||||
| 	yes | ||||
| --- | ||||
| name: eglob-trim-1 | ||||
| @@ -2354,7 +2352,7 @@ expected-stdout: | ||||
| --- | ||||
| name: glob-bad-1 | ||||
| description: | ||||
| 	Check that globbing isn't done when glob has syntax error | ||||
| 	Check that [ matches itself if it's not a valid bracket expr | ||||
| file-setup: dir 755 "[x" | ||||
| file-setup: file 644 "[x/foo" | ||||
| stdin: | ||||
| @@ -2362,8 +2360,8 @@ stdin: | ||||
| 	echo *[x | ||||
| 	echo [x/* | ||||
| expected-stdout: | ||||
| 	[* | ||||
| 	*[x | ||||
| 	[x | ||||
| 	[x | ||||
| 	[x/foo | ||||
| --- | ||||
| name: glob-bad-2 | ||||
| @@ -2403,24 +2401,31 @@ file-setup: file 644 "abc" | ||||
| file-setup: file 644 "bbc" | ||||
| file-setup: file 644 "cbc" | ||||
| file-setup: file 644 "-bc" | ||||
| file-setup: file 644 "!bc" | ||||
| file-setup: file 644 "^bc" | ||||
| file-setup: file 644 "+bc" | ||||
| file-setup: file 644 ",bc" | ||||
| file-setup: file 644 "0bc" | ||||
| file-setup: file 644 "1bc" | ||||
| stdin: | ||||
| 	echo [ab-]* | ||||
| 	echo [-ab]* | ||||
| 	echo [!-ab]* | ||||
| 	echo [!ab]* | ||||
| 	echo []ab]* | ||||
| 	:>'./!bc' | ||||
| 	:>'./^bc' | ||||
| 	echo [^ab]* | ||||
| 	echo [!ab]* | ||||
| 	echo [+--]* | ||||
| 	echo [--1]* | ||||
|  | ||||
| expected-stdout: | ||||
| 	-bc abc bbc | ||||
| 	-bc abc bbc | ||||
| 	cbc | ||||
| 	-bc cbc | ||||
| 	!bc +bc ,bc 0bc 1bc ^bc cbc | ||||
| 	!bc +bc ,bc -bc 0bc 1bc ^bc cbc | ||||
| 	abc bbc | ||||
| 	^bc abc bbc | ||||
| 	!bc -bc ^bc cbc | ||||
| 	+bc ,bc -bc | ||||
| 	-bc 0bc 1bc | ||||
| --- | ||||
| name: glob-range-2 | ||||
| description: | ||||
|   | ||||
							
								
								
									
										4
									
								
								eval.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								eval.c
									
									
									
									
									
								
							| @@ -23,7 +23,7 @@ | ||||
|  | ||||
| #include "sh.h" | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.207 2017/04/28 00:38:29 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.208 2017/04/29 21:49:06 tg Exp $"); | ||||
|  | ||||
| /* | ||||
|  * string expansion | ||||
| @@ -1666,7 +1666,7 @@ globit(XString *xs,	/* dest string */ | ||||
| 	 * directory isn't readable - if no globbing is needed, only execute | ||||
| 	 * permission should be required (as per POSIX)). | ||||
| 	 */ | ||||
| 	if (!has_globbing(sp, se)) { | ||||
| 	if (!has_globbing(sp)) { | ||||
| 		XcheckN(*xs, xp, se - sp + 1); | ||||
| 		debunk(xp, sp, Xnleft(*xs, xp)); | ||||
| 		xp += strlen(xp); | ||||
|   | ||||
							
								
								
									
										116
									
								
								misc.c
									
									
									
									
									
								
							
							
						
						
									
										116
									
								
								misc.c
									
									
									
									
									
								
							| @@ -32,7 +32,7 @@ | ||||
| #include <grp.h> | ||||
| #endif | ||||
|  | ||||
| __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.270 2017/04/28 21:43:30 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.271 2017/04/29 21:49:07 tg Exp $"); | ||||
|  | ||||
| #define KSH_CHVT_FLAG | ||||
| #ifdef MKSH_SMALL | ||||
| @@ -634,7 +634,7 @@ gmatchx(const char *s, const char *p, bool isfile) | ||||
| 	 * isfile is false iff no syntax check has been done on | ||||
| 	 * the pattern. If check fails, just do a strcmp(). | ||||
| 	 */ | ||||
| 	if (!isfile && !has_globbing(p, pe)) { | ||||
| 	if (!isfile && !has_globbing(p)) { | ||||
| 		size_t len = pe - p + 1; | ||||
| 		char tbuf[64]; | ||||
| 		char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP); | ||||
| @@ -661,7 +661,7 @@ gmatchx(const char *s, const char *p, bool isfile) | ||||
|  * Syntax errors are: | ||||
|  *	- [ with no closing ] | ||||
|  *	- imbalanced $(...) expression | ||||
|  *	- [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d)) | ||||
|  *	- [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d)) | ||||
|  */ | ||||
| /*XXX | ||||
|  * - if no magic, | ||||
| @@ -672,60 +672,80 @@ gmatchx(const char *s, const char *p, bool isfile) | ||||
|  *	return ? | ||||
|  * - return ? | ||||
|  */ | ||||
| int | ||||
| has_globbing(const char *xp, const char *xpe) | ||||
| bool | ||||
| has_globbing(const char *pat) | ||||
| { | ||||
| 	const unsigned char *p = (const unsigned char *) xp; | ||||
| 	const unsigned char *pe = (const unsigned char *) xpe; | ||||
| 	int c; | ||||
| 	int nest = 0, bnest = 0; | ||||
| 	unsigned char c, subc; | ||||
| 	bool saw_glob = false; | ||||
| 	/* inside [...] */ | ||||
| 	bool in_bracket = false; | ||||
| 	unsigned int nest = 0; | ||||
| 	const unsigned char *p = (const unsigned char *)pat; | ||||
| 	const unsigned char *s; | ||||
|  | ||||
| 	for (; p < pe; p++) { | ||||
| 		if (!ISMAGIC(*p)) | ||||
| 	while ((c = *p++)) { | ||||
| 		/* regular character? ok. */ | ||||
| 		if (!ISMAGIC(c)) | ||||
| 			continue; | ||||
| 		if ((c = *++p) == '*' || c == '?') | ||||
| 		/* MAGIC + NUL? abort. */ | ||||
| 		if (!(c = *p++)) | ||||
| 			return (false); | ||||
| 		/* some specials */ | ||||
| 		if (c == '*' || c == '?') { | ||||
| 			/* easy glob, accept */ | ||||
| 			saw_glob = true; | ||||
| 		else if (c == '[') { | ||||
| 			if (!in_bracket) { | ||||
| 				saw_glob = true; | ||||
| 				in_bracket = true; | ||||
| 				if (ISMAGIC(p[1]) && p[2] == '!') | ||||
| 					p += 2; | ||||
| 				if (ISMAGIC(p[1]) && p[2] == ']') | ||||
| 					p += 2; | ||||
| 			} | ||||
| 			/*XXX Do we need to check ranges here? POSIX Q */ | ||||
| 		} else if (c == ']') { | ||||
| 			if (in_bracket) { | ||||
| 				if (bnest) | ||||
| 					/* [a*(b]) */ | ||||
| 					return (0); | ||||
| 				in_bracket = false; | ||||
| 		} else if (c == '[') { | ||||
| 			/* bracket expression; eat negation and initial ] */ | ||||
| 			if (ISMAGIC(p[0]) && p[1] == '!') | ||||
| 				p += 2; | ||||
| 			if (ISMAGIC(p[0]) && p[1] == ']') | ||||
| 				p += 2; | ||||
| 			/* check next string part */ | ||||
| 			s = p; | ||||
| 			while ((c = *s++)) { | ||||
| 				/* regular chars are ok */ | ||||
| 				if (!ISMAGIC(c)) | ||||
| 					continue; | ||||
| 				/* MAGIC + NUL cannot happen */ | ||||
| 				if (!(c = *s++)) | ||||
| 					return (false); | ||||
| 				/* terminating bracket? */ | ||||
| 				if (c == ']') { | ||||
| 					/* accept and continue */ | ||||
| 					p = s; | ||||
| 					saw_glob = true; | ||||
| 					break; | ||||
| 				} | ||||
| 				/* collating, equivalence or character class */ | ||||
| 				if (c == '[' && ( | ||||
| 				    *s == '.' || *s == '=' || *s == ':')) { | ||||
| 					/* must stop with exactly the same c */ | ||||
| 					subc = *s++; | ||||
| 					/* arbitrarily many chars in betwixt */ | ||||
| 					while ((c = *s++)) | ||||
| 						/* but only this sequence... */ | ||||
| 						if (c == subc && ISMAGIC(*s) && | ||||
| 						    s[1] == ']') { | ||||
| 							/* accept, terminate */ | ||||
| 							s += 2; | ||||
| 							break; | ||||
| 						} | ||||
| 					/* EOS without: reject bracket expr */ | ||||
| 					if (!c) | ||||
| 						break; | ||||
| 					/* continue; */ | ||||
| 				} | ||||
| 				/* anything else just goes on */ | ||||
| 			} | ||||
| 		} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) { | ||||
| 			/* opening pattern */ | ||||
| 			saw_glob = true; | ||||
| 			if (in_bracket) | ||||
| 				bnest++; | ||||
| 			else | ||||
| 				nest++; | ||||
| 		} else if (c == '|') { | ||||
| 			if (in_bracket && !bnest) | ||||
| 				/* *(a[foo|bar]) */ | ||||
| 				return (0); | ||||
| 		} else if (c == /*(*/ ')') { | ||||
| 			if (in_bracket) { | ||||
| 				if (!bnest--) | ||||
| 					/* *(a[b)c] */ | ||||
| 					return (0); | ||||
| 			} else if (nest) | ||||
| 				nest--; | ||||
| 			++nest; | ||||
| 		} else if (c == /*(*/')') { | ||||
| 			/* closing pattern */ | ||||
| 			if (nest) | ||||
| 				--nest; | ||||
| 		} | ||||
| 		/* else must be MAGIC followed by MAGIC or one of: ]{},!- */ | ||||
| 	} | ||||
| 	return (saw_glob && !in_bracket && !nest); | ||||
| 	return (saw_glob && !nest); | ||||
| } | ||||
|  | ||||
| /* Function must return either 0 or 1 (assumed by code for 0x80|'!') */ | ||||
| @@ -774,7 +794,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se, | ||||
| 			return (0); | ||||
|  | ||||
| 		/** | ||||
| 		 * [*+?@!](pattern|pattern|..) | ||||
| 		 * [+*?@!](pattern|pattern|..) | ||||
| 		 * This is also needed for ${..%..}, etc. | ||||
| 		 */ | ||||
|  | ||||
|   | ||||
							
								
								
									
										4
									
								
								sh.h
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								sh.h
									
									
									
									
									
								
							| @@ -175,7 +175,7 @@ | ||||
| #endif | ||||
|  | ||||
| #ifdef EXTERN | ||||
| __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.832 2017/04/29 14:20:24 tg Exp $"); | ||||
| __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.833 2017/04/29 21:49:07 tg Exp $"); | ||||
| #endif | ||||
| #define MKSH_VERSION "R55 2017/04/27" | ||||
|  | ||||
| @@ -2528,7 +2528,7 @@ void change_xtrace(unsigned char, bool); | ||||
| int parse_args(const char **, int, bool *); | ||||
| int getn(const char *, int *); | ||||
| int gmatchx(const char *, const char *, bool); | ||||
| int has_globbing(const char *, const char *) MKSH_A_PURE; | ||||
| bool has_globbing(const char *) MKSH_A_PURE; | ||||
| int ascstrcmp(const void *, const void *) MKSH_A_PURE; | ||||
| int ascpstrcmp(const void *, const void *) MKSH_A_PURE; | ||||
| void ksh_getopt_reset(Getopt *, int); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user