reimplement has_globbing() with proper bracket expression parsing,
and take ahead parsing collating symbols, equivalence classes and character classes already (heck my first draft of this already did better than GNU bash, ksh93 I still don’t grok its code at all)
This commit is contained in:
parent
bcf3130230
commit
f2de19c41f
39
check.t
39
check.t
@ -1,4 +1,4 @@
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.785 2017/04/29 15:18:25 tg Exp $
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.786 2017/04/29 21:49:04 tg Exp $
|
||||
# -*- mode: sh -*-
|
||||
#-
|
||||
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||
@ -1962,15 +1962,11 @@ expected-stdout:
|
||||
name: eglob-bad-1
|
||||
description:
|
||||
Check that globbing isn't done when glob has syntax error
|
||||
file-setup: file 644 "abcx"
|
||||
file-setup: file 644 "abcz"
|
||||
file-setup: file 644 "bbc"
|
||||
file-setup: file 644 "@(a[b|)c]foo"
|
||||
stdin:
|
||||
echo !([*)*
|
||||
echo +(a|b[)*
|
||||
echo @(a[b|)c]*
|
||||
expected-stdout:
|
||||
!([*)*
|
||||
+(a|b[)*
|
||||
@(a[b|)c]*
|
||||
---
|
||||
name: eglob-bad-2
|
||||
description:
|
||||
@ -2057,9 +2053,11 @@ stdin:
|
||||
case foo in *(a|b[)) echo yes;; *) echo no;; esac
|
||||
case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
|
||||
case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
|
||||
case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac
|
||||
expected-stdout:
|
||||
no
|
||||
yes
|
||||
no
|
||||
yes
|
||||
---
|
||||
name: eglob-trim-1
|
||||
@ -2354,7 +2352,7 @@ expected-stdout:
|
||||
---
|
||||
name: glob-bad-1
|
||||
description:
|
||||
Check that globbing isn't done when glob has syntax error
|
||||
Check that [ matches itself if it's not a valid bracket expr
|
||||
file-setup: dir 755 "[x"
|
||||
file-setup: file 644 "[x/foo"
|
||||
stdin:
|
||||
@ -2362,8 +2360,8 @@ stdin:
|
||||
echo *[x
|
||||
echo [x/*
|
||||
expected-stdout:
|
||||
[*
|
||||
*[x
|
||||
[x
|
||||
[x
|
||||
[x/foo
|
||||
---
|
||||
name: glob-bad-2
|
||||
@ -2403,24 +2401,31 @@ file-setup: file 644 "abc"
|
||||
file-setup: file 644 "bbc"
|
||||
file-setup: file 644 "cbc"
|
||||
file-setup: file 644 "-bc"
|
||||
file-setup: file 644 "!bc"
|
||||
file-setup: file 644 "^bc"
|
||||
file-setup: file 644 "+bc"
|
||||
file-setup: file 644 ",bc"
|
||||
file-setup: file 644 "0bc"
|
||||
file-setup: file 644 "1bc"
|
||||
stdin:
|
||||
echo [ab-]*
|
||||
echo [-ab]*
|
||||
echo [!-ab]*
|
||||
echo [!ab]*
|
||||
echo []ab]*
|
||||
:>'./!bc'
|
||||
:>'./^bc'
|
||||
echo [^ab]*
|
||||
echo [!ab]*
|
||||
echo [+--]*
|
||||
echo [--1]*
|
||||
|
||||
expected-stdout:
|
||||
-bc abc bbc
|
||||
-bc abc bbc
|
||||
cbc
|
||||
-bc cbc
|
||||
!bc +bc ,bc 0bc 1bc ^bc cbc
|
||||
!bc +bc ,bc -bc 0bc 1bc ^bc cbc
|
||||
abc bbc
|
||||
^bc abc bbc
|
||||
!bc -bc ^bc cbc
|
||||
+bc ,bc -bc
|
||||
-bc 0bc 1bc
|
||||
---
|
||||
name: glob-range-2
|
||||
description:
|
||||
|
4
eval.c
4
eval.c
@ -23,7 +23,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.207 2017/04/28 00:38:29 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.208 2017/04/29 21:49:06 tg Exp $");
|
||||
|
||||
/*
|
||||
* string expansion
|
||||
@ -1666,7 +1666,7 @@ globit(XString *xs, /* dest string */
|
||||
* directory isn't readable - if no globbing is needed, only execute
|
||||
* permission should be required (as per POSIX)).
|
||||
*/
|
||||
if (!has_globbing(sp, se)) {
|
||||
if (!has_globbing(sp)) {
|
||||
XcheckN(*xs, xp, se - sp + 1);
|
||||
debunk(xp, sp, Xnleft(*xs, xp));
|
||||
xp += strlen(xp);
|
||||
|
116
misc.c
116
misc.c
@ -32,7 +32,7 @@
|
||||
#include <grp.h>
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.270 2017/04/28 21:43:30 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.271 2017/04/29 21:49:07 tg Exp $");
|
||||
|
||||
#define KSH_CHVT_FLAG
|
||||
#ifdef MKSH_SMALL
|
||||
@ -634,7 +634,7 @@ gmatchx(const char *s, const char *p, bool isfile)
|
||||
* isfile is false iff no syntax check has been done on
|
||||
* the pattern. If check fails, just do a strcmp().
|
||||
*/
|
||||
if (!isfile && !has_globbing(p, pe)) {
|
||||
if (!isfile && !has_globbing(p)) {
|
||||
size_t len = pe - p + 1;
|
||||
char tbuf[64];
|
||||
char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
|
||||
@ -661,7 +661,7 @@ gmatchx(const char *s, const char *p, bool isfile)
|
||||
* Syntax errors are:
|
||||
* - [ with no closing ]
|
||||
* - imbalanced $(...) expression
|
||||
* - [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d))
|
||||
* - [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d))
|
||||
*/
|
||||
/*XXX
|
||||
* - if no magic,
|
||||
@ -672,60 +672,80 @@ gmatchx(const char *s, const char *p, bool isfile)
|
||||
* return ?
|
||||
* - return ?
|
||||
*/
|
||||
int
|
||||
has_globbing(const char *xp, const char *xpe)
|
||||
bool
|
||||
has_globbing(const char *pat)
|
||||
{
|
||||
const unsigned char *p = (const unsigned char *) xp;
|
||||
const unsigned char *pe = (const unsigned char *) xpe;
|
||||
int c;
|
||||
int nest = 0, bnest = 0;
|
||||
unsigned char c, subc;
|
||||
bool saw_glob = false;
|
||||
/* inside [...] */
|
||||
bool in_bracket = false;
|
||||
unsigned int nest = 0;
|
||||
const unsigned char *p = (const unsigned char *)pat;
|
||||
const unsigned char *s;
|
||||
|
||||
for (; p < pe; p++) {
|
||||
if (!ISMAGIC(*p))
|
||||
while ((c = *p++)) {
|
||||
/* regular character? ok. */
|
||||
if (!ISMAGIC(c))
|
||||
continue;
|
||||
if ((c = *++p) == '*' || c == '?')
|
||||
/* MAGIC + NUL? abort. */
|
||||
if (!(c = *p++))
|
||||
return (false);
|
||||
/* some specials */
|
||||
if (c == '*' || c == '?') {
|
||||
/* easy glob, accept */
|
||||
saw_glob = true;
|
||||
else if (c == '[') {
|
||||
if (!in_bracket) {
|
||||
saw_glob = true;
|
||||
in_bracket = true;
|
||||
if (ISMAGIC(p[1]) && p[2] == '!')
|
||||
p += 2;
|
||||
if (ISMAGIC(p[1]) && p[2] == ']')
|
||||
p += 2;
|
||||
}
|
||||
/*XXX Do we need to check ranges here? POSIX Q */
|
||||
} else if (c == ']') {
|
||||
if (in_bracket) {
|
||||
if (bnest)
|
||||
/* [a*(b]) */
|
||||
return (0);
|
||||
in_bracket = false;
|
||||
} else if (c == '[') {
|
||||
/* bracket expression; eat negation and initial ] */
|
||||
if (ISMAGIC(p[0]) && p[1] == '!')
|
||||
p += 2;
|
||||
if (ISMAGIC(p[0]) && p[1] == ']')
|
||||
p += 2;
|
||||
/* check next string part */
|
||||
s = p;
|
||||
while ((c = *s++)) {
|
||||
/* regular chars are ok */
|
||||
if (!ISMAGIC(c))
|
||||
continue;
|
||||
/* MAGIC + NUL cannot happen */
|
||||
if (!(c = *s++))
|
||||
return (false);
|
||||
/* terminating bracket? */
|
||||
if (c == ']') {
|
||||
/* accept and continue */
|
||||
p = s;
|
||||
saw_glob = true;
|
||||
break;
|
||||
}
|
||||
/* collating, equivalence or character class */
|
||||
if (c == '[' && (
|
||||
*s == '.' || *s == '=' || *s == ':')) {
|
||||
/* must stop with exactly the same c */
|
||||
subc = *s++;
|
||||
/* arbitrarily many chars in betwixt */
|
||||
while ((c = *s++))
|
||||
/* but only this sequence... */
|
||||
if (c == subc && ISMAGIC(*s) &&
|
||||
s[1] == ']') {
|
||||
/* accept, terminate */
|
||||
s += 2;
|
||||
break;
|
||||
}
|
||||
/* EOS without: reject bracket expr */
|
||||
if (!c)
|
||||
break;
|
||||
/* continue; */
|
||||
}
|
||||
/* anything else just goes on */
|
||||
}
|
||||
} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
|
||||
/* opening pattern */
|
||||
saw_glob = true;
|
||||
if (in_bracket)
|
||||
bnest++;
|
||||
else
|
||||
nest++;
|
||||
} else if (c == '|') {
|
||||
if (in_bracket && !bnest)
|
||||
/* *(a[foo|bar]) */
|
||||
return (0);
|
||||
} else if (c == /*(*/ ')') {
|
||||
if (in_bracket) {
|
||||
if (!bnest--)
|
||||
/* *(a[b)c] */
|
||||
return (0);
|
||||
} else if (nest)
|
||||
nest--;
|
||||
++nest;
|
||||
} else if (c == /*(*/')') {
|
||||
/* closing pattern */
|
||||
if (nest)
|
||||
--nest;
|
||||
}
|
||||
/* else must be MAGIC followed by MAGIC or one of: ]{},!- */
|
||||
}
|
||||
return (saw_glob && !in_bracket && !nest);
|
||||
return (saw_glob && !nest);
|
||||
}
|
||||
|
||||
/* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
|
||||
@ -774,7 +794,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
|
||||
return (0);
|
||||
|
||||
/**
|
||||
* [*+?@!](pattern|pattern|..)
|
||||
* [+*?@!](pattern|pattern|..)
|
||||
* This is also needed for ${..%..}, etc.
|
||||
*/
|
||||
|
||||
|
4
sh.h
4
sh.h
@ -175,7 +175,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef EXTERN
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.832 2017/04/29 14:20:24 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.833 2017/04/29 21:49:07 tg Exp $");
|
||||
#endif
|
||||
#define MKSH_VERSION "R55 2017/04/27"
|
||||
|
||||
@ -2528,7 +2528,7 @@ void change_xtrace(unsigned char, bool);
|
||||
int parse_args(const char **, int, bool *);
|
||||
int getn(const char *, int *);
|
||||
int gmatchx(const char *, const char *, bool);
|
||||
int has_globbing(const char *, const char *) MKSH_A_PURE;
|
||||
bool has_globbing(const char *) MKSH_A_PURE;
|
||||
int ascstrcmp(const void *, const void *) MKSH_A_PURE;
|
||||
int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
|
||||
void ksh_getopt_reset(Getopt *, int);
|
||||
|
Loading…
x
Reference in New Issue
Block a user