first cut at the new matching code, IT WORKS!!! in the FIRST try!
missing: - tons of new testcases - EBCDIC support with ASCII ordering for POSIX ranges
This commit is contained in:
parent
515df6ab7c
commit
c03372e8a4
6
check.t
6
check.t
@ -1,4 +1,4 @@
|
|||||||
# $MirOS: src/bin/mksh/check.t,v 1.788 2017/05/03 16:17:08 tg Exp $
|
# $MirOS: src/bin/mksh/check.t,v 1.789 2017/05/03 17:48:06 tg Exp $
|
||||||
# -*- mode: sh -*-
|
# -*- mode: sh -*-
|
||||||
#-
|
#-
|
||||||
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
# Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
|
||||||
@ -2487,10 +2487,10 @@ file-setup: file 644 "-bc"
|
|||||||
file-setup: file 644 "@bc"
|
file-setup: file 644 "@bc"
|
||||||
stdin:
|
stdin:
|
||||||
echo [a-c-e]*
|
echo [a-c-e]*
|
||||||
#XXX TODO: echo [a--@]*
|
echo [a--@]*
|
||||||
# -> @bc
|
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
-bc abc bbc cbc ebc
|
-bc abc bbc cbc ebc
|
||||||
|
@bc
|
||||||
---
|
---
|
||||||
name: glob-word-1
|
name: glob-word-1
|
||||||
description:
|
description:
|
||||||
|
203
misc.c
203
misc.c
@ -32,7 +32,7 @@
|
|||||||
#include <grp.h>
|
#include <grp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.274 2017/05/01 19:44:29 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.275 2017/05/03 17:48:08 tg Exp $");
|
||||||
|
|
||||||
#define KSH_CHVT_FLAG
|
#define KSH_CHVT_FLAG
|
||||||
#ifdef MKSH_SMALL
|
#ifdef MKSH_SMALL
|
||||||
@ -952,14 +952,13 @@ static const struct cclass {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char *
|
static const unsigned char *
|
||||||
gmatch_cclass(const unsigned char *p, unsigned char sub)
|
|
||||||
#if 0
|
|
||||||
gmatch_cclass(const unsigned char *pat, unsigned char sc)
|
gmatch_cclass(const unsigned char *pat, unsigned char sc)
|
||||||
{
|
{
|
||||||
unsigned char c, subc;
|
unsigned char c, subc, lc;
|
||||||
const unsigned char *p = pat, *s;
|
const unsigned char *p = pat, *s;
|
||||||
bool found = false;
|
bool found = false;
|
||||||
bool negated = false;
|
bool negated = false;
|
||||||
|
char *subp;
|
||||||
|
|
||||||
/* check for negation */
|
/* check for negation */
|
||||||
if (ISMAGIC(p[0]) && ord(p[1]) == ord('!')) {
|
if (ISMAGIC(p[0]) && ord(p[1]) == ord('!')) {
|
||||||
@ -971,6 +970,7 @@ gmatch_cclass(const unsigned char *pat, unsigned char sc)
|
|||||||
++p;
|
++p;
|
||||||
/* iterate over bracket expression, debunk()ing on the fly */
|
/* iterate over bracket expression, debunk()ing on the fly */
|
||||||
while ((c = *p++)) {
|
while ((c = *p++)) {
|
||||||
|
nextc:
|
||||||
/* non-regular character? */
|
/* non-regular character? */
|
||||||
if (ISMAGIC(c)) {
|
if (ISMAGIC(c)) {
|
||||||
/* MAGIC + NUL cannot happen */
|
/* MAGIC + NUL cannot happen */
|
||||||
@ -993,51 +993,162 @@ gmatch_cclass(const unsigned char *pat, unsigned char sc)
|
|||||||
subc = *p++;
|
subc = *p++;
|
||||||
/* save away start of substring */
|
/* save away start of substring */
|
||||||
s = p;
|
s = p;
|
||||||
}
|
/* arbitrarily many chars in betwixt */
|
||||||
#endif
|
while ((c = *p++))
|
||||||
{
|
/* but only this sequence... */
|
||||||
unsigned char c, d;
|
if (c == subc && ISMAGIC(*p) &&
|
||||||
bool notp, found = false;
|
ord(p[1]) == ord(']')) {
|
||||||
const unsigned char *orig_p = p;
|
/* accept, terminate */
|
||||||
|
|
||||||
if ((notp = tobool(ISMAGIC(*p) && *++p == '!')))
|
|
||||||
p++;
|
|
||||||
do {
|
|
||||||
c = *p++;
|
|
||||||
if (ISMAGIC(c)) {
|
|
||||||
c = *p++;
|
|
||||||
if ((c & 0x80) && !ISMAGIC(c)) {
|
|
||||||
/* extended pattern matching: *+?@! */
|
|
||||||
c &= 0x7F;
|
|
||||||
/* XXX the ( char isn't handled as part of [] */
|
|
||||||
if (c == ' ')
|
|
||||||
/* simile for @: plain (..) */
|
|
||||||
c = '(' /*)*/;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (c == '\0')
|
|
||||||
/* No closing ] - act as if the opening [ was quoted */
|
|
||||||
return (sub == '[' ? orig_p : NULL);
|
|
||||||
if (ISMAGIC(p[0]) && p[1] == '-' &&
|
|
||||||
(!ISMAGIC(p[2]) || p[3] != ']')) {
|
|
||||||
/* MAGIC- */
|
|
||||||
p += 2;
|
p += 2;
|
||||||
d = *p++;
|
break;
|
||||||
if (ISMAGIC(d)) {
|
|
||||||
d = *p++;
|
|
||||||
if ((d & 0x80) && !ISMAGIC(d))
|
|
||||||
d &= 0x7f;
|
|
||||||
}
|
}
|
||||||
/* POSIX says this is an invalid expression */
|
/* EOS without: reject bracket expr */
|
||||||
if (c > d)
|
if (!c)
|
||||||
return (NULL);
|
break;
|
||||||
} else
|
/* debunk substring */
|
||||||
d = c;
|
strndupx(subp, s, p - s - 3, ATEMP);
|
||||||
if (c == sub || (c <= sub && sub <= d))
|
debunk(subp, subp, p - s - 3 + 1);
|
||||||
found = true;
|
cclass_common:
|
||||||
} while (!(ISMAGIC(p[0]) && p[1] == ']'));
|
/* whither subexpression */
|
||||||
|
if (ord(subc) == ord(':')) {
|
||||||
|
const struct cclass *cls = cclasses;
|
||||||
|
|
||||||
return ((found != notp) ? p+2 : NULL);
|
/* search for name in cclass list */
|
||||||
|
while (cls->name)
|
||||||
|
if (!strcmp(subp, cls->name)) {
|
||||||
|
/* found, match? */
|
||||||
|
if (ctype(sc,
|
||||||
|
cls->value))
|
||||||
|
found = true;
|
||||||
|
/* break either way */
|
||||||
|
break;
|
||||||
|
} else
|
||||||
|
++cls;
|
||||||
|
/* that's all here */
|
||||||
|
afree(subp, ATEMP);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* collating element or equivalence class */
|
||||||
|
/* Note: latter are treated as former */
|
||||||
|
if (ctype(subp[0], C_ASCII) && !subp[1])
|
||||||
|
/* [.a.] where a is one ASCII char */
|
||||||
|
c = subp[0];
|
||||||
|
else
|
||||||
|
/* force no match */
|
||||||
|
c = 0;
|
||||||
|
/* no longer needed */
|
||||||
|
afree(subp, ATEMP);
|
||||||
|
} else if (!ISMAGIC(c) && (c & 0x80)) {
|
||||||
|
/* 0x80|' ' is plain (...) */
|
||||||
|
if ((c &= 0x7F) != ' ') {
|
||||||
|
/* check single match NOW */
|
||||||
|
if (sc == c)
|
||||||
|
found = true;
|
||||||
|
/* next character is (...) */
|
||||||
|
}
|
||||||
|
c = '('/*)*/;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* range expression? */
|
||||||
|
if (!(ISMAGIC(p[0]) && ord(p[1]) == ord('-') &&
|
||||||
|
/* not terminating bracket? */
|
||||||
|
(!ISMAGIC(p[2]) || ord(p[3]) != ord(']')))) {
|
||||||
|
/* no, check single match */
|
||||||
|
if (sc == c)
|
||||||
|
/* note: sc is never NUL */
|
||||||
|
found = true;
|
||||||
|
/* do the next "first" character */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* save lower range bound */
|
||||||
|
lc = c;
|
||||||
|
/* skip over the range operator */
|
||||||
|
p += 2;
|
||||||
|
/* do the same shit as above... almost */
|
||||||
|
subc = 0;
|
||||||
|
if (!(c = *p++))
|
||||||
|
break;
|
||||||
|
/* non-regular character? */
|
||||||
|
if (ISMAGIC(c)) {
|
||||||
|
/* MAGIC + NUL cannot happen */
|
||||||
|
if (!(c = *p++))
|
||||||
|
break;
|
||||||
|
/* sub-bracket expressions */
|
||||||
|
if (ord(c) == ord('[') && (
|
||||||
|
/* collating element? */
|
||||||
|
ord(*p) == ord('.') ||
|
||||||
|
/* equivalence class? */
|
||||||
|
ord(*p) == ord('=') ||
|
||||||
|
/* character class? */
|
||||||
|
ord(*p) == ord(':'))) {
|
||||||
|
/* must stop with exactly the same c */
|
||||||
|
subc = *p++;
|
||||||
|
/* save away start of substring */
|
||||||
|
s = p;
|
||||||
|
/* arbitrarily many chars in betwixt */
|
||||||
|
while ((c = *p++))
|
||||||
|
/* but only this sequence... */
|
||||||
|
if (c == subc && ISMAGIC(*p) &&
|
||||||
|
ord(p[1]) == ord(']')) {
|
||||||
|
/* accept, terminate */
|
||||||
|
p += 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* EOS without: reject bracket expr */
|
||||||
|
if (!c)
|
||||||
|
break;
|
||||||
|
/* debunk substring */
|
||||||
|
strndupx(subp, s, p - s - 3, ATEMP);
|
||||||
|
debunk(subp, subp, p - s - 3 + 1);
|
||||||
|
/* whither subexpression */
|
||||||
|
if (ord(subc) == ord(':')) {
|
||||||
|
/* oops, not a range */
|
||||||
|
|
||||||
|
/* match single previous char */
|
||||||
|
if (lc && (sc == lc))
|
||||||
|
found = true;
|
||||||
|
/* match hyphen-minus */
|
||||||
|
if (ord(sc) == ord('-'))
|
||||||
|
found = true;
|
||||||
|
/* handle cclass common part */
|
||||||
|
goto cclass_common;
|
||||||
|
}
|
||||||
|
/* collating element or equivalence class */
|
||||||
|
/* Note: latter are treated as former */
|
||||||
|
if (ctype(subp[0], C_ASCII) && !subp[1])
|
||||||
|
/* [.a.] where a is one ASCII char */
|
||||||
|
c = subp[0];
|
||||||
|
else
|
||||||
|
/* force no match */
|
||||||
|
c = 0;
|
||||||
|
/* no longer needed */
|
||||||
|
afree(subp, ATEMP);
|
||||||
|
/* other meaning below */
|
||||||
|
subc = 0;
|
||||||
|
} else if (c == (0x80 | ' ')) {
|
||||||
|
/* 0x80|' ' is plain (...) */
|
||||||
|
c = '('/*)*/;
|
||||||
|
} else if (!ISMAGIC(c) && (c & 0x80)) {
|
||||||
|
c &= 0x7F;
|
||||||
|
subc = '('/*)*/;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* now do the actual range match check */
|
||||||
|
if (lc != 0 && /* c != 0 && */ lc <= sc && sc <= c)
|
||||||
|
found = true;
|
||||||
|
/* forced next character? */
|
||||||
|
if (subc) {
|
||||||
|
c = subc;
|
||||||
|
goto nextc;
|
||||||
|
}
|
||||||
|
/* otherwise, just go on with the pattern string */
|
||||||
|
}
|
||||||
|
/* if we broke here, the bracket expression was invalid */
|
||||||
|
if (ord(sc) == ord('['))
|
||||||
|
/* initial opening bracket as literal match */
|
||||||
|
return (pat);
|
||||||
|
/* or rather no match */
|
||||||
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Look for next ) or | (if match_sep) in *(foo|bar) pattern */
|
/* Look for next ) or | (if match_sep) in *(foo|bar) pattern */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user