reimplement has_globbing() with proper bracket expression parsing,
and take ahead parsing collating symbols, equivalence classes and character classes already (heck my first draft of this already did better than GNU bash, ksh93 I still don’t grok its code at all)
This commit is contained in:
parent
bcf3130230
commit
f2de19c41f
39
check.t
39
check.t
@ -1,4 +1,4 @@
|
|||||||
# $MirOS: src/bin/mksh/check.t,v 1.785 2017/04/29 15:18:25 tg Exp $
|
# $MirOS: src/bin/mksh/check.t,v 1.786 2017/04/29 21:49:04 tg Exp $
|
||||||
# -*- mode: sh -*-
|
# -*- mode: sh -*-
|
||||||
#-
|
#-
|
||||||
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||||
@ -1962,15 +1962,11 @@ expected-stdout:
|
|||||||
name: eglob-bad-1
|
name: eglob-bad-1
|
||||||
description:
|
description:
|
||||||
Check that globbing isn't done when glob has syntax error
|
Check that globbing isn't done when glob has syntax error
|
||||||
file-setup: file 644 "abcx"
|
file-setup: file 644 "@(a[b|)c]foo"
|
||||||
file-setup: file 644 "abcz"
|
|
||||||
file-setup: file 644 "bbc"
|
|
||||||
stdin:
|
stdin:
|
||||||
echo !([*)*
|
echo @(a[b|)c]*
|
||||||
echo +(a|b[)*
|
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
!([*)*
|
@(a[b|)c]*
|
||||||
+(a|b[)*
|
|
||||||
---
|
---
|
||||||
name: eglob-bad-2
|
name: eglob-bad-2
|
||||||
description:
|
description:
|
||||||
@ -2057,9 +2053,11 @@ stdin:
|
|||||||
case foo in *(a|b[)) echo yes;; *) echo no;; esac
|
case foo in *(a|b[)) echo yes;; *) echo no;; esac
|
||||||
case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
|
case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
|
||||||
case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
|
case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
|
||||||
|
case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
no
|
no
|
||||||
yes
|
yes
|
||||||
|
no
|
||||||
yes
|
yes
|
||||||
---
|
---
|
||||||
name: eglob-trim-1
|
name: eglob-trim-1
|
||||||
@ -2354,7 +2352,7 @@ expected-stdout:
|
|||||||
---
|
---
|
||||||
name: glob-bad-1
|
name: glob-bad-1
|
||||||
description:
|
description:
|
||||||
Check that globbing isn't done when glob has syntax error
|
Check that [ matches itself if it's not a valid bracket expr
|
||||||
file-setup: dir 755 "[x"
|
file-setup: dir 755 "[x"
|
||||||
file-setup: file 644 "[x/foo"
|
file-setup: file 644 "[x/foo"
|
||||||
stdin:
|
stdin:
|
||||||
@ -2362,8 +2360,8 @@ stdin:
|
|||||||
echo *[x
|
echo *[x
|
||||||
echo [x/*
|
echo [x/*
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
[*
|
[x
|
||||||
*[x
|
[x
|
||||||
[x/foo
|
[x/foo
|
||||||
---
|
---
|
||||||
name: glob-bad-2
|
name: glob-bad-2
|
||||||
@ -2403,24 +2401,31 @@ file-setup: file 644 "abc"
|
|||||||
file-setup: file 644 "bbc"
|
file-setup: file 644 "bbc"
|
||||||
file-setup: file 644 "cbc"
|
file-setup: file 644 "cbc"
|
||||||
file-setup: file 644 "-bc"
|
file-setup: file 644 "-bc"
|
||||||
|
file-setup: file 644 "!bc"
|
||||||
|
file-setup: file 644 "^bc"
|
||||||
|
file-setup: file 644 "+bc"
|
||||||
|
file-setup: file 644 ",bc"
|
||||||
|
file-setup: file 644 "0bc"
|
||||||
|
file-setup: file 644 "1bc"
|
||||||
stdin:
|
stdin:
|
||||||
echo [ab-]*
|
echo [ab-]*
|
||||||
echo [-ab]*
|
echo [-ab]*
|
||||||
echo [!-ab]*
|
echo [!-ab]*
|
||||||
echo [!ab]*
|
echo [!ab]*
|
||||||
echo []ab]*
|
echo []ab]*
|
||||||
:>'./!bc'
|
|
||||||
:>'./^bc'
|
|
||||||
echo [^ab]*
|
echo [^ab]*
|
||||||
echo [!ab]*
|
echo [+--]*
|
||||||
|
echo [--1]*
|
||||||
|
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
-bc abc bbc
|
-bc abc bbc
|
||||||
-bc abc bbc
|
-bc abc bbc
|
||||||
cbc
|
!bc +bc ,bc 0bc 1bc ^bc cbc
|
||||||
-bc cbc
|
!bc +bc ,bc -bc 0bc 1bc ^bc cbc
|
||||||
abc bbc
|
abc bbc
|
||||||
^bc abc bbc
|
^bc abc bbc
|
||||||
!bc -bc ^bc cbc
|
+bc ,bc -bc
|
||||||
|
-bc 0bc 1bc
|
||||||
---
|
---
|
||||||
name: glob-range-2
|
name: glob-range-2
|
||||||
description:
|
description:
|
||||||
|
4
eval.c
4
eval.c
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.207 2017/04/28 00:38:29 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.208 2017/04/29 21:49:06 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* string expansion
|
* string expansion
|
||||||
@ -1666,7 +1666,7 @@ globit(XString *xs, /* dest string */
|
|||||||
* directory isn't readable - if no globbing is needed, only execute
|
* directory isn't readable - if no globbing is needed, only execute
|
||||||
* permission should be required (as per POSIX)).
|
* permission should be required (as per POSIX)).
|
||||||
*/
|
*/
|
||||||
if (!has_globbing(sp, se)) {
|
if (!has_globbing(sp)) {
|
||||||
XcheckN(*xs, xp, se - sp + 1);
|
XcheckN(*xs, xp, se - sp + 1);
|
||||||
debunk(xp, sp, Xnleft(*xs, xp));
|
debunk(xp, sp, Xnleft(*xs, xp));
|
||||||
xp += strlen(xp);
|
xp += strlen(xp);
|
||||||
|
112
misc.c
112
misc.c
@ -32,7 +32,7 @@
|
|||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.270 2017/04/28 21:43:30 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.271 2017/04/29 21:49:07 tg Exp $");
|
||||||
|
|
||||||
#define KSH_CHVT_FLAG
|
#define KSH_CHVT_FLAG
|
||||||
#ifdef MKSH_SMALL
|
#ifdef MKSH_SMALL
|
||||||
@ -634,7 +634,7 @@ gmatchx(const char *s, const char *p, bool isfile)
|
|||||||
* isfile is false iff no syntax check has been done on
|
* isfile is false iff no syntax check has been done on
|
||||||
* the pattern. If check fails, just do a strcmp().
|
* the pattern. If check fails, just do a strcmp().
|
||||||
*/
|
*/
|
||||||
if (!isfile && !has_globbing(p, pe)) {
|
if (!isfile && !has_globbing(p)) {
|
||||||
size_t len = pe - p + 1;
|
size_t len = pe - p + 1;
|
||||||
char tbuf[64];
|
char tbuf[64];
|
||||||
char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
|
char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
|
||||||
@ -661,7 +661,7 @@ gmatchx(const char *s, const char *p, bool isfile)
|
|||||||
* Syntax errors are:
|
* Syntax errors are:
|
||||||
* - [ with no closing ]
|
* - [ with no closing ]
|
||||||
* - imbalanced $(...) expression
|
* - imbalanced $(...) expression
|
||||||
* - [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d))
|
* - [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d))
|
||||||
*/
|
*/
|
||||||
/*XXX
|
/*XXX
|
||||||
* - if no magic,
|
* - if no magic,
|
||||||
@ -672,60 +672,80 @@ gmatchx(const char *s, const char *p, bool isfile)
|
|||||||
* return ?
|
* return ?
|
||||||
* - return ?
|
* - return ?
|
||||||
*/
|
*/
|
||||||
int
|
bool
|
||||||
has_globbing(const char *xp, const char *xpe)
|
has_globbing(const char *pat)
|
||||||
{
|
{
|
||||||
const unsigned char *p = (const unsigned char *) xp;
|
unsigned char c, subc;
|
||||||
const unsigned char *pe = (const unsigned char *) xpe;
|
|
||||||
int c;
|
|
||||||
int nest = 0, bnest = 0;
|
|
||||||
bool saw_glob = false;
|
bool saw_glob = false;
|
||||||
/* inside [...] */
|
unsigned int nest = 0;
|
||||||
bool in_bracket = false;
|
const unsigned char *p = (const unsigned char *)pat;
|
||||||
|
const unsigned char *s;
|
||||||
|
|
||||||
for (; p < pe; p++) {
|
while ((c = *p++)) {
|
||||||
if (!ISMAGIC(*p))
|
/* regular character? ok. */
|
||||||
|
if (!ISMAGIC(c))
|
||||||
continue;
|
continue;
|
||||||
if ((c = *++p) == '*' || c == '?')
|
/* MAGIC + NUL? abort. */
|
||||||
|
if (!(c = *p++))
|
||||||
|
return (false);
|
||||||
|
/* some specials */
|
||||||
|
if (c == '*' || c == '?') {
|
||||||
|
/* easy glob, accept */
|
||||||
saw_glob = true;
|
saw_glob = true;
|
||||||
else if (c == '[') {
|
} else if (c == '[') {
|
||||||
if (!in_bracket) {
|
/* bracket expression; eat negation and initial ] */
|
||||||
|
if (ISMAGIC(p[0]) && p[1] == '!')
|
||||||
|
p += 2;
|
||||||
|
if (ISMAGIC(p[0]) && p[1] == ']')
|
||||||
|
p += 2;
|
||||||
|
/* check next string part */
|
||||||
|
s = p;
|
||||||
|
while ((c = *s++)) {
|
||||||
|
/* regular chars are ok */
|
||||||
|
if (!ISMAGIC(c))
|
||||||
|
continue;
|
||||||
|
/* MAGIC + NUL cannot happen */
|
||||||
|
if (!(c = *s++))
|
||||||
|
return (false);
|
||||||
|
/* terminating bracket? */
|
||||||
|
if (c == ']') {
|
||||||
|
/* accept and continue */
|
||||||
|
p = s;
|
||||||
saw_glob = true;
|
saw_glob = true;
|
||||||
in_bracket = true;
|
break;
|
||||||
if (ISMAGIC(p[1]) && p[2] == '!')
|
|
||||||
p += 2;
|
|
||||||
if (ISMAGIC(p[1]) && p[2] == ']')
|
|
||||||
p += 2;
|
|
||||||
}
|
}
|
||||||
/*XXX Do we need to check ranges here? POSIX Q */
|
/* collating, equivalence or character class */
|
||||||
} else if (c == ']') {
|
if (c == '[' && (
|
||||||
if (in_bracket) {
|
*s == '.' || *s == '=' || *s == ':')) {
|
||||||
if (bnest)
|
/* must stop with exactly the same c */
|
||||||
/* [a*(b]) */
|
subc = *s++;
|
||||||
return (0);
|
/* arbitrarily many chars in betwixt */
|
||||||
in_bracket = false;
|
while ((c = *s++))
|
||||||
|
/* but only this sequence... */
|
||||||
|
if (c == subc && ISMAGIC(*s) &&
|
||||||
|
s[1] == ']') {
|
||||||
|
/* accept, terminate */
|
||||||
|
s += 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* EOS without: reject bracket expr */
|
||||||
|
if (!c)
|
||||||
|
break;
|
||||||
|
/* continue; */
|
||||||
|
}
|
||||||
|
/* anything else just goes on */
|
||||||
}
|
}
|
||||||
} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
|
} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
|
||||||
|
/* opening pattern */
|
||||||
saw_glob = true;
|
saw_glob = true;
|
||||||
if (in_bracket)
|
++nest;
|
||||||
bnest++;
|
} else if (c == /*(*/')') {
|
||||||
else
|
/* closing pattern */
|
||||||
nest++;
|
if (nest)
|
||||||
} else if (c == '|') {
|
--nest;
|
||||||
if (in_bracket && !bnest)
|
|
||||||
/* *(a[foo|bar]) */
|
|
||||||
return (0);
|
|
||||||
} else if (c == /*(*/ ')') {
|
|
||||||
if (in_bracket) {
|
|
||||||
if (!bnest--)
|
|
||||||
/* *(a[b)c] */
|
|
||||||
return (0);
|
|
||||||
} else if (nest)
|
|
||||||
nest--;
|
|
||||||
}
|
}
|
||||||
/* else must be MAGIC followed by MAGIC or one of: ]{},!- */
|
|
||||||
}
|
}
|
||||||
return (saw_glob && !in_bracket && !nest);
|
return (saw_glob && !nest);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
|
/* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
|
||||||
@ -774,7 +794,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
|
|||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* [*+?@!](pattern|pattern|..)
|
* [+*?@!](pattern|pattern|..)
|
||||||
* This is also needed for ${..%..}, etc.
|
* This is also needed for ${..%..}, etc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
4
sh.h
4
sh.h
@ -175,7 +175,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EXTERN
|
#ifdef EXTERN
|
||||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.832 2017/04/29 14:20:24 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.833 2017/04/29 21:49:07 tg Exp $");
|
||||||
#endif
|
#endif
|
||||||
#define MKSH_VERSION "R55 2017/04/27"
|
#define MKSH_VERSION "R55 2017/04/27"
|
||||||
|
|
||||||
@ -2528,7 +2528,7 @@ void change_xtrace(unsigned char, bool);
|
|||||||
int parse_args(const char **, int, bool *);
|
int parse_args(const char **, int, bool *);
|
||||||
int getn(const char *, int *);
|
int getn(const char *, int *);
|
||||||
int gmatchx(const char *, const char *, bool);
|
int gmatchx(const char *, const char *, bool);
|
||||||
int has_globbing(const char *, const char *) MKSH_A_PURE;
|
bool has_globbing(const char *) MKSH_A_PURE;
|
||||||
int ascstrcmp(const void *, const void *) MKSH_A_PURE;
|
int ascstrcmp(const void *, const void *) MKSH_A_PURE;
|
||||||
int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
|
int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
|
||||||
void ksh_getopt_reset(Getopt *, int);
|
void ksh_getopt_reset(Getopt *, int);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user