reimplement has_globbing() with proper bracket expression parsing,

and take ahead parsing collating symbols, equivalence classes and
character classes already (heck my first draft of this already did
better than GNU bash, ksh93 I still don’t grok its code at all)
This commit is contained in:
tg 2017-04-29 21:49:07 +00:00
parent bcf3130230
commit f2de19c41f
4 changed files with 94 additions and 69 deletions

39
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.785 2017/04/29 15:18:25 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.786 2017/04/29 21:49:04 tg Exp $
# -*- mode: sh -*-
#-
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@ -1962,15 +1962,11 @@ expected-stdout:
name: eglob-bad-1
description:
Check that globbing isn't done when glob has syntax error
file-setup: file 644 "abcx"
file-setup: file 644 "abcz"
file-setup: file 644 "bbc"
file-setup: file 644 "@(a[b|)c]foo"
stdin:
echo !([*)*
echo +(a|b[)*
echo @(a[b|)c]*
expected-stdout:
!([*)*
+(a|b[)*
@(a[b|)c]*
---
name: eglob-bad-2
description:
@ -2057,9 +2053,11 @@ stdin:
case foo in *(a|b[)) echo yes;; *) echo no;; esac
case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac
expected-stdout:
no
yes
no
yes
---
name: eglob-trim-1
@ -2354,7 +2352,7 @@ expected-stdout:
---
name: glob-bad-1
description:
Check that globbing isn't done when glob has syntax error
Check that [ matches itself if it's not a valid bracket expr
file-setup: dir 755 "[x"
file-setup: file 644 "[x/foo"
stdin:
@ -2362,8 +2360,8 @@ stdin:
echo *[x
echo [x/*
expected-stdout:
[*
*[x
[x
[x
[x/foo
---
name: glob-bad-2
@ -2403,24 +2401,31 @@ file-setup: file 644 "abc"
file-setup: file 644 "bbc"
file-setup: file 644 "cbc"
file-setup: file 644 "-bc"
file-setup: file 644 "!bc"
file-setup: file 644 "^bc"
file-setup: file 644 "+bc"
file-setup: file 644 ",bc"
file-setup: file 644 "0bc"
file-setup: file 644 "1bc"
stdin:
echo [ab-]*
echo [-ab]*
echo [!-ab]*
echo [!ab]*
echo []ab]*
:>'./!bc'
:>'./^bc'
echo [^ab]*
echo [!ab]*
echo [+--]*
echo [--1]*
expected-stdout:
-bc abc bbc
-bc abc bbc
cbc
-bc cbc
!bc +bc ,bc 0bc 1bc ^bc cbc
!bc +bc ,bc -bc 0bc 1bc ^bc cbc
abc bbc
^bc abc bbc
!bc -bc ^bc cbc
+bc ,bc -bc
-bc 0bc 1bc
---
name: glob-range-2
description:

4
eval.c
View File

@ -23,7 +23,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.207 2017/04/28 00:38:29 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.208 2017/04/29 21:49:06 tg Exp $");
/*
* string expansion
@ -1666,7 +1666,7 @@ globit(XString *xs, /* dest string */
* directory isn't readable - if no globbing is needed, only execute
* permission should be required (as per POSIX)).
*/
if (!has_globbing(sp, se)) {
if (!has_globbing(sp)) {
XcheckN(*xs, xp, se - sp + 1);
debunk(xp, sp, Xnleft(*xs, xp));
xp += strlen(xp);

116
misc.c
View File

@ -32,7 +32,7 @@
#include <grp.h>
#endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.270 2017/04/28 21:43:30 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.271 2017/04/29 21:49:07 tg Exp $");
#define KSH_CHVT_FLAG
#ifdef MKSH_SMALL
@ -634,7 +634,7 @@ gmatchx(const char *s, const char *p, bool isfile)
* isfile is false iff no syntax check has been done on
* the pattern. If check fails, just do a strcmp().
*/
if (!isfile && !has_globbing(p, pe)) {
if (!isfile && !has_globbing(p)) {
size_t len = pe - p + 1;
char tbuf[64];
char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
@ -661,7 +661,7 @@ gmatchx(const char *s, const char *p, bool isfile)
* Syntax errors are:
* - [ with no closing ]
* - imbalanced $(...) expression
* - [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d))
* - [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d))
*/
/*XXX
* - if no magic,
@ -672,60 +672,80 @@ gmatchx(const char *s, const char *p, bool isfile)
* return ?
* - return ?
*/
int
has_globbing(const char *xp, const char *xpe)
bool
has_globbing(const char *pat)
{
const unsigned char *p = (const unsigned char *) xp;
const unsigned char *pe = (const unsigned char *) xpe;
int c;
int nest = 0, bnest = 0;
unsigned char c, subc;
bool saw_glob = false;
/* inside [...] */
bool in_bracket = false;
unsigned int nest = 0;
const unsigned char *p = (const unsigned char *)pat;
const unsigned char *s;
for (; p < pe; p++) {
if (!ISMAGIC(*p))
while ((c = *p++)) {
/* regular character? ok. */
if (!ISMAGIC(c))
continue;
if ((c = *++p) == '*' || c == '?')
/* MAGIC + NUL? abort. */
if (!(c = *p++))
return (false);
/* some specials */
if (c == '*' || c == '?') {
/* easy glob, accept */
saw_glob = true;
else if (c == '[') {
if (!in_bracket) {
saw_glob = true;
in_bracket = true;
if (ISMAGIC(p[1]) && p[2] == '!')
p += 2;
if (ISMAGIC(p[1]) && p[2] == ']')
p += 2;
}
/*XXX Do we need to check ranges here? POSIX Q */
} else if (c == ']') {
if (in_bracket) {
if (bnest)
/* [a*(b]) */
return (0);
in_bracket = false;
} else if (c == '[') {
/* bracket expression; eat negation and initial ] */
if (ISMAGIC(p[0]) && p[1] == '!')
p += 2;
if (ISMAGIC(p[0]) && p[1] == ']')
p += 2;
/* check next string part */
s = p;
while ((c = *s++)) {
/* regular chars are ok */
if (!ISMAGIC(c))
continue;
/* MAGIC + NUL cannot happen */
if (!(c = *s++))
return (false);
/* terminating bracket? */
if (c == ']') {
/* accept and continue */
p = s;
saw_glob = true;
break;
}
/* collating, equivalence or character class */
if (c == '[' && (
*s == '.' || *s == '=' || *s == ':')) {
/* must stop with exactly the same c */
subc = *s++;
/* arbitrarily many chars in betwixt */
while ((c = *s++))
/* but only this sequence... */
if (c == subc && ISMAGIC(*s) &&
s[1] == ']') {
/* accept, terminate */
s += 2;
break;
}
/* EOS without: reject bracket expr */
if (!c)
break;
/* continue; */
}
/* anything else just goes on */
}
} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
/* opening pattern */
saw_glob = true;
if (in_bracket)
bnest++;
else
nest++;
} else if (c == '|') {
if (in_bracket && !bnest)
/* *(a[foo|bar]) */
return (0);
} else if (c == /*(*/ ')') {
if (in_bracket) {
if (!bnest--)
/* *(a[b)c] */
return (0);
} else if (nest)
nest--;
++nest;
} else if (c == /*(*/')') {
/* closing pattern */
if (nest)
--nest;
}
/* else must be MAGIC followed by MAGIC or one of: ]{},!- */
}
return (saw_glob && !in_bracket && !nest);
return (saw_glob && !nest);
}
/* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
@ -774,7 +794,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
return (0);
/**
* [*+?@!](pattern|pattern|..)
* [+*?@!](pattern|pattern|..)
* This is also needed for ${..%..}, etc.
*/

4
sh.h
View File

@ -175,7 +175,7 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.832 2017/04/29 14:20:24 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.833 2017/04/29 21:49:07 tg Exp $");
#endif
#define MKSH_VERSION "R55 2017/04/27"
@ -2528,7 +2528,7 @@ void change_xtrace(unsigned char, bool);
int parse_args(const char **, int, bool *);
int getn(const char *, int *);
int gmatchx(const char *, const char *, bool);
int has_globbing(const char *, const char *) MKSH_A_PURE;
bool has_globbing(const char *) MKSH_A_PURE;
int ascstrcmp(const void *, const void *) MKSH_A_PURE;
int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
void ksh_getopt_reset(Getopt *, int);