reimplement has_globbing() with proper bracket expression parsing,

and take ahead parsing collating symbols, equivalence classes and
character classes already (heck my first draft of this already did
better than GNU bash, ksh93 I still don’t grok its code at all)
This commit is contained in:
tg 2017-04-29 21:49:07 +00:00
parent bcf3130230
commit f2de19c41f
4 changed files with 94 additions and 69 deletions

39
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.785 2017/04/29 15:18:25 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.786 2017/04/29 21:49:04 tg Exp $
# -*- mode: sh -*- # -*- mode: sh -*-
#- #-
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@ -1962,15 +1962,11 @@ expected-stdout:
name: eglob-bad-1 name: eglob-bad-1
description: description:
Check that globbing isn't done when glob has syntax error Check that globbing isn't done when glob has syntax error
file-setup: file 644 "abcx" file-setup: file 644 "@(a[b|)c]foo"
file-setup: file 644 "abcz"
file-setup: file 644 "bbc"
stdin: stdin:
echo !([*)* echo @(a[b|)c]*
echo +(a|b[)*
expected-stdout: expected-stdout:
!([*)* @(a[b|)c]*
+(a|b[)*
--- ---
name: eglob-bad-2 name: eglob-bad-2
description: description:
@ -2057,9 +2053,11 @@ stdin:
case foo in *(a|b[)) echo yes;; *) echo no;; esac case foo in *(a|b[)) echo yes;; *) echo no;; esac
case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac
expected-stdout: expected-stdout:
no no
yes yes
no
yes yes
--- ---
name: eglob-trim-1 name: eglob-trim-1
@ -2354,7 +2352,7 @@ expected-stdout:
--- ---
name: glob-bad-1 name: glob-bad-1
description: description:
Check that globbing isn't done when glob has syntax error Check that [ matches itself if it's not a valid bracket expr
file-setup: dir 755 "[x" file-setup: dir 755 "[x"
file-setup: file 644 "[x/foo" file-setup: file 644 "[x/foo"
stdin: stdin:
@ -2362,8 +2360,8 @@ stdin:
echo *[x echo *[x
echo [x/* echo [x/*
expected-stdout: expected-stdout:
[* [x
*[x [x
[x/foo [x/foo
--- ---
name: glob-bad-2 name: glob-bad-2
@ -2403,24 +2401,31 @@ file-setup: file 644 "abc"
file-setup: file 644 "bbc" file-setup: file 644 "bbc"
file-setup: file 644 "cbc" file-setup: file 644 "cbc"
file-setup: file 644 "-bc" file-setup: file 644 "-bc"
file-setup: file 644 "!bc"
file-setup: file 644 "^bc"
file-setup: file 644 "+bc"
file-setup: file 644 ",bc"
file-setup: file 644 "0bc"
file-setup: file 644 "1bc"
stdin: stdin:
echo [ab-]* echo [ab-]*
echo [-ab]* echo [-ab]*
echo [!-ab]* echo [!-ab]*
echo [!ab]* echo [!ab]*
echo []ab]* echo []ab]*
:>'./!bc'
:>'./^bc'
echo [^ab]* echo [^ab]*
echo [!ab]* echo [+--]*
echo [--1]*
expected-stdout: expected-stdout:
-bc abc bbc -bc abc bbc
-bc abc bbc -bc abc bbc
cbc !bc +bc ,bc 0bc 1bc ^bc cbc
-bc cbc !bc +bc ,bc -bc 0bc 1bc ^bc cbc
abc bbc abc bbc
^bc abc bbc ^bc abc bbc
!bc -bc ^bc cbc +bc ,bc -bc
-bc 0bc 1bc
--- ---
name: glob-range-2 name: glob-range-2
description: description:

4
eval.c
View File

@ -23,7 +23,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.207 2017/04/28 00:38:29 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.208 2017/04/29 21:49:06 tg Exp $");
/* /*
* string expansion * string expansion
@ -1666,7 +1666,7 @@ globit(XString *xs, /* dest string */
* directory isn't readable - if no globbing is needed, only execute * directory isn't readable - if no globbing is needed, only execute
* permission should be required (as per POSIX)). * permission should be required (as per POSIX)).
*/ */
if (!has_globbing(sp, se)) { if (!has_globbing(sp)) {
XcheckN(*xs, xp, se - sp + 1); XcheckN(*xs, xp, se - sp + 1);
debunk(xp, sp, Xnleft(*xs, xp)); debunk(xp, sp, Xnleft(*xs, xp));
xp += strlen(xp); xp += strlen(xp);

112
misc.c
View File

@ -32,7 +32,7 @@
#include <grp.h> #include <grp.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.270 2017/04/28 21:43:30 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.271 2017/04/29 21:49:07 tg Exp $");
#define KSH_CHVT_FLAG #define KSH_CHVT_FLAG
#ifdef MKSH_SMALL #ifdef MKSH_SMALL
@ -634,7 +634,7 @@ gmatchx(const char *s, const char *p, bool isfile)
* isfile is false iff no syntax check has been done on * isfile is false iff no syntax check has been done on
* the pattern. If check fails, just do a strcmp(). * the pattern. If check fails, just do a strcmp().
*/ */
if (!isfile && !has_globbing(p, pe)) { if (!isfile && !has_globbing(p)) {
size_t len = pe - p + 1; size_t len = pe - p + 1;
char tbuf[64]; char tbuf[64];
char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP); char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
@ -661,7 +661,7 @@ gmatchx(const char *s, const char *p, bool isfile)
* Syntax errors are: * Syntax errors are:
* - [ with no closing ] * - [ with no closing ]
* - imbalanced $(...) expression * - imbalanced $(...) expression
* - [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d)) * - [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d))
*/ */
/*XXX /*XXX
* - if no magic, * - if no magic,
@ -672,60 +672,80 @@ gmatchx(const char *s, const char *p, bool isfile)
* return ? * return ?
* - return ? * - return ?
*/ */
int bool
has_globbing(const char *xp, const char *xpe) has_globbing(const char *pat)
{ {
const unsigned char *p = (const unsigned char *) xp; unsigned char c, subc;
const unsigned char *pe = (const unsigned char *) xpe;
int c;
int nest = 0, bnest = 0;
bool saw_glob = false; bool saw_glob = false;
/* inside [...] */ unsigned int nest = 0;
bool in_bracket = false; const unsigned char *p = (const unsigned char *)pat;
const unsigned char *s;
for (; p < pe; p++) { while ((c = *p++)) {
if (!ISMAGIC(*p)) /* regular character? ok. */
if (!ISMAGIC(c))
continue; continue;
if ((c = *++p) == '*' || c == '?') /* MAGIC + NUL? abort. */
if (!(c = *p++))
return (false);
/* some specials */
if (c == '*' || c == '?') {
/* easy glob, accept */
saw_glob = true; saw_glob = true;
else if (c == '[') { } else if (c == '[') {
if (!in_bracket) { /* bracket expression; eat negation and initial ] */
if (ISMAGIC(p[0]) && p[1] == '!')
p += 2;
if (ISMAGIC(p[0]) && p[1] == ']')
p += 2;
/* check next string part */
s = p;
while ((c = *s++)) {
/* regular chars are ok */
if (!ISMAGIC(c))
continue;
/* MAGIC + NUL cannot happen */
if (!(c = *s++))
return (false);
/* terminating bracket? */
if (c == ']') {
/* accept and continue */
p = s;
saw_glob = true; saw_glob = true;
in_bracket = true; break;
if (ISMAGIC(p[1]) && p[2] == '!')
p += 2;
if (ISMAGIC(p[1]) && p[2] == ']')
p += 2;
} }
/*XXX Do we need to check ranges here? POSIX Q */ /* collating, equivalence or character class */
} else if (c == ']') { if (c == '[' && (
if (in_bracket) { *s == '.' || *s == '=' || *s == ':')) {
if (bnest) /* must stop with exactly the same c */
/* [a*(b]) */ subc = *s++;
return (0); /* arbitrarily many chars in betwixt */
in_bracket = false; while ((c = *s++))
/* but only this sequence... */
if (c == subc && ISMAGIC(*s) &&
s[1] == ']') {
/* accept, terminate */
s += 2;
break;
}
/* EOS without: reject bracket expr */
if (!c)
break;
/* continue; */
}
/* anything else just goes on */
} }
} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) { } else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
/* opening pattern */
saw_glob = true; saw_glob = true;
if (in_bracket) ++nest;
bnest++; } else if (c == /*(*/')') {
else /* closing pattern */
nest++; if (nest)
} else if (c == '|') { --nest;
if (in_bracket && !bnest)
/* *(a[foo|bar]) */
return (0);
} else if (c == /*(*/ ')') {
if (in_bracket) {
if (!bnest--)
/* *(a[b)c] */
return (0);
} else if (nest)
nest--;
} }
/* else must be MAGIC followed by MAGIC or one of: ]{},!- */
} }
return (saw_glob && !in_bracket && !nest); return (saw_glob && !nest);
} }
/* Function must return either 0 or 1 (assumed by code for 0x80|'!') */ /* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
@ -774,7 +794,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
return (0); return (0);
/** /**
* [*+?@!](pattern|pattern|..) * [+*?@!](pattern|pattern|..)
* This is also needed for ${..%..}, etc. * This is also needed for ${..%..}, etc.
*/ */

4
sh.h
View File

@ -175,7 +175,7 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.832 2017/04/29 14:20:24 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.833 2017/04/29 21:49:07 tg Exp $");
#endif #endif
#define MKSH_VERSION "R55 2017/04/27" #define MKSH_VERSION "R55 2017/04/27"
@ -2528,7 +2528,7 @@ void change_xtrace(unsigned char, bool);
int parse_args(const char **, int, bool *); int parse_args(const char **, int, bool *);
int getn(const char *, int *); int getn(const char *, int *);
int gmatchx(const char *, const char *, bool); int gmatchx(const char *, const char *, bool);
int has_globbing(const char *, const char *) MKSH_A_PURE; bool has_globbing(const char *) MKSH_A_PURE;
int ascstrcmp(const void *, const void *) MKSH_A_PURE; int ascstrcmp(const void *, const void *) MKSH_A_PURE;
int ascpstrcmp(const void *, const void *) MKSH_A_PURE; int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
void ksh_getopt_reset(Getopt *, int); void ksh_getopt_reset(Getopt *, int);