quite a big change, but now the variables expanded are not

scanned for slashes, plus the ADELIM code gets more use and
a bugfix ☺
This commit is contained in:
tg 2008-03-01 21:10:26 +00:00
parent 16dd1d3c8f
commit 4912b5cf67
6 changed files with 233 additions and 173 deletions

44
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.154 2008/03/01 17:14:17 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.155 2008/03/01 21:10:25 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -1076,20 +1076,60 @@ expected-stdout:
5: 123
6: 123
---
name: eglob-substrpl-3
name: eglob-substrpl-3a
description:
Check substring replacement works with variables and slashes, too
stdin:
pfx=/home/user
wd=/home/user/tmp
echo ${wd/#$pfx/~}
echo ${wd/#\$pfx/~}
echo ${wd/#"$pfx"/~}
echo ${wd/#'$pfx'/~}
echo ${wd/#"\$pfx"/~}
echo ${wd/#'\$pfx'/~}
expected-stdout:
~/tmp
/home/user/tmp
~/tmp
/home/user/tmp
/home/user/tmp
/home/user/tmp
---
name: eglob-substrpl-3b
description:
More of this, bash fails it
stdin:
pfx=/home/user
wd=/home/user/tmp
echo ${wd/#$(echo /home/user)/~}
echo ${wd/#"$(echo /home/user)"/~}
echo ${wd/#'$(echo /home/user)'/~}
expected-stdout:
~/tmp
~/tmp
/home/user/tmp
---
name: eglob-substrpl-3c
description:
Even more weird cases
stdin:
pfx=/home/user
wd='$pfx/tmp'
echo ${wd/#$pfx/~}
echo ${wd/#\$pfx/~}
echo ${wd/#"$pfx"/~}
echo ${wd/#'$pfx'/~}
echo ${wd/#"\$pfx"/~}
echo ${wd/#'\$pfx'/~}
expected-stdout:
$pfx/tmp
~/tmp
$pfx/tmp
~/tmp
~/tmp
~/tmp
---
name: glob-bad-1
description:
Check that globbing isn't done when glob has syntax error

268
eval.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.42 2008/03/01 17:14:17 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.43 2008/03/01 21:10:25 tg Exp $");
#ifdef MKSH_SMALL
#define MKSH_NOPWNAM
@ -166,7 +166,6 @@ expand(const char *cp, /* input word */
int newlines = 0; /* For trailing newlines in COMSUB */
int saw_eq, tilde_ok;
int make_magic;
int sqmode = 0; /* keep backslashes before [\\/%#] */
size_t len;
if (cp == NULL)
@ -339,11 +338,11 @@ expand(const char *cp, /* input word */
end[-2] = EOS;
sp += end - beg - 1;
}
evaluate(substitute(stg = wdstrip(beg), 0),
evaluate(substitute(stg = wdstrip(beg, false, false), 0),
&from, KSH_UNWIND_ERROR, true);
afree(stg, ATEMP);
if (end) {
evaluate(stg = wdstrip(mid),
evaluate(stg = wdstrip(mid, false, false),
&num, KSH_UNWIND_ERROR, true);
afree(stg, ATEMP);
}
@ -361,6 +360,134 @@ expand(const char *cp, /* input word */
x.str = str_nsave(beg, num, ATEMP);
goto do_CSUBST;
}
case '/': {
char *s, *p, *d, *sbeg, *end;
char *pat, *rrep;
char *tpat0, *tpat1, *tpat2;
/* ! DOBLANK,DOBRACE_,DOTILDE */
f = DOPAT | (f&DONTRUNCOMMAND) |
DOTEMP_;
quote = 0;
s = wdcopy(sp, ATEMP);
p = s + (wdscan(sp, ADELIM) - sp);
d = s + (wdscan(sp, CSUBST) - sp);
#if 0
fprintf(stderr,
"D: s=%p 〈%s〉\n"
" p=%p 〈%s〉\n"
" d=%p 〈%s〉\n",
s, wdstrip(s, true, false),
p, wdstrip(p, true, false),
d, wdstrip(d, true, false));
fflush(stderr);
#endif
if (p >= d)
goto unwind_substsyn;
p[-2] = EOS;
if (p[-1] == /*{*/'}')
d = NULL;
else
d[-2] = EOS;
sp += (d ? d : p) - s - 1;
tpat0 = wdstrip(s, true, true);
pat = substitute(tpat0, 0);
rrep = d ? wdstrip(p, true, false) : null;
afree(s, ATEMP);
s = d = pat;
while (*s)
if (*s != '\\' ||
s[1] == '%' ||
s[1] == '#' ||
s[1] == '\0' ||
/* XXX really? */ s[1] == '\\' ||
s[1] == '/')
*d++ = *s++;
else
s++;
*d = '\0';
#if 0
fprintf(stderr,
"D: %s%s%s\n",
tpat0, pat, rrep);
fflush(stderr);
#endif
afree(tpat0, ATEMP);
/* reject empty pattern */
if (!*pat)
goto no_repl;
/* prepare string on which to work */
sbeg = s = str_save(str_val(st->var), ATEMP);
/* first see if we have any match at all */
tpat0 = pat;
if (*pat == '#') {
/* anchor at the beginning */
tpat1 = shf_smprintf("%s%c*", ++tpat0, MAGIC);
tpat2 = tpat1;
} else if (*pat == '%') {
/* anchor at the end */
tpat1 = shf_smprintf("%c*%s", MAGIC, ++tpat0);
tpat2 = tpat0;
} else {
/* float */
tpat1 = shf_smprintf("%c*%s%c*", MAGIC, pat, MAGIC);
tpat2 = tpat1 + 2;
}
again_repl:
#if 0
fprintf(stderr,
"D: 「%s」 ← %s%s\n",
s, tpat0, rrep);
fflush(stderr);
#endif
/* this would not be necessary if gmatchx would return
* the start and end values of a match found, like re*
*/
if (!gmatchx(sbeg, tpat1, false))
goto end_repl;
end = strnul(s);
/* now anchor the beginning of the match */
if (*pat != '#')
while (sbeg <= end) {
if (gmatchx(sbeg, tpat2, false))
break;
else
sbeg++;
}
/* now anchor the end of the match */
p = end;
if (*pat != '%')
while (p >= sbeg) {
bool gotmatch;
c = *p; *p = '\0';
gotmatch = gmatchx(sbeg, tpat0, false);
*p = c;
if (gotmatch)
break;
p--;
}
end = str_nsave(s, sbeg - s, ATEMP);
d = shf_smprintf("%s%s%s", end, rrep, p);
afree(end, ATEMP);
sbeg = d + (sbeg - s) + strlen(rrep);
afree(s, ATEMP);
s = d;
if (stype & 0x80)
goto again_repl;
end_repl:
afree(tpat1, ATEMP);
x.str = s;
no_repl:
afree(pat, ATEMP);
if (rrep != null)
afree(rrep, ATEMP);
goto do_CSUBST;
}
case '#':
case '%':
/* ! DOBLANK,DOBRACE_,DOTILDE */
@ -374,13 +501,6 @@ expand(const char *cp, /* input word */
*dp++ = MAGIC;
*dp++ = (char)('@' | 0x80);
break;
case '/':
/* ! DOBLANK,DOBRACE_,DOTILDE */
f = DOPAT | (f&DONTRUNCOMMAND) |
DOTEMP_;
quote = 0;
sqmode = 2;
break;
case '=':
/* Enabling tilde expansion
* after :s here is
@ -424,7 +544,6 @@ expand(const char *cp, /* input word */
tilde_ok = 0; /* in case of ${unset:-} */
*dp = '\0';
quote = st->quote;
sqmode = 0;
f = st->f;
if (f&DOBLANK)
doblank--;
@ -432,10 +551,7 @@ expand(const char *cp, /* input word */
case '#':
case '%':
/* Append end-pattern */
*dp++ = MAGIC; *dp++ = ')';
/* FALLTHROUGH */
case '/':
*dp = '\0';
*dp++ = MAGIC; *dp++ = ')'; *dp = '\0';
dp = Xrestpos(ds, dp, st->base);
/* Must use st->var since calling
* global would break things
@ -486,6 +602,7 @@ expand(const char *cp, /* input word */
(debunk(s, s, strlen(s) + 1), s));
}
case '0':
case '/':
dp = Xrestpos(ds, dp, st->base);
type = XSUB;
if (f&DOBLANK)
@ -600,19 +717,6 @@ expand(const char *cp, /* input word */
break;
}
if (sqmode) {
/* keep backslash before backslash or C_SUBOP2 char */
if ((c == '\\') ||
(quote && c == '/') ||
(quote && sqmode == 2 && ctype(c, C_SUBOP2)))
*dp++ = '\\';
if (sqmode == 2 && (quote || c != '/'))
/* beginning of string, ign. leading chars */
sqmode = 1;
else if (!quote && c == '/')
sqmode = 0;
}
/* check for end of word or IFS separation */
if (c == 0 || (!quote && (f & DOBLANK) && doblank &&
!make_magic && ctype(c, C_IFS))) {
@ -824,7 +928,14 @@ varsub(Expand *xp, const char *sp, const char *word,
stype = 0x80;
c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
}
if (stype == 0x80 && (c == ' ' || c == '0')) {
if (!stype && c == '/') {
slen += 2;
stype = c;
if (word[slen] == ADELIM) {
slen += 2;
stype |= 0x80;
}
} else if (stype == 0x80 && (c == ' ' || c == '0')) {
stype |= '0';
} else if (ctype(c, C_SUBOP1)) {
slen += 2;
@ -848,7 +959,6 @@ varsub(Expand *xp, const char *sp, const char *word,
switch (stype & 0x7f) {
case '=': /* can't assign to a vector */
case '%': /* can't trim a vector (yet) */
case '/':
case '#':
return -1;
}
@ -871,7 +981,6 @@ varsub(Expand *xp, const char *sp, const char *word,
case '=': /* can't assign to a vector */
case '%': /* can't trim a vector (yet) */
case '#':
case '/':
case '?':
return -1;
}
@ -906,7 +1015,7 @@ varsub(Expand *xp, const char *sp, const char *word,
c = stype&0x7f;
/* test the compiler's code generator */
if (ctype(c, C_SUBOP2) || stype == (0x80 | '0') ||
if (ctype(c, C_SUBOP2) || stype == (0x80 | '0') || c == '/' ||
(((stype&0x80) ? *xp->str=='\0' : xp->str==null) ? /* undef? */
c == '=' || c == '-' || c == '?' : c == '+'))
state = XBASE; /* expand word instead of variable value */
@ -1011,99 +1120,6 @@ trimsub(char *str, char *pat, int how)
return str_nsave(str, p - str, ATEMP);
}
break;
case '/': /* replace once - SLOW! */
case '/'|0x80: /* replace all - SLOWER! */
{
char *rpat, *rrep, *tpat1, *tpat2, *tpat0, *sbeg, *s, *d;
/* separate search pattern and replacement string */
s = d = rpat = str_save(pat, ATEMP);
rrep = null;
while ((c = *s++))
if (c == '\\') {
if (s[0] == '\\' && s[1] != '/')
++s;
if (!(*d++ = *s++))
break;
} else if (c == '/') {
rrep = s;
break;
} else
*d++ = c;
*d++ = '\0';
/* do not accept empty pattern */
if (!*rpat) {
afree(rpat, ATEMP);
return (str);
}
/* prepare string on which to work */
sbeg = s = str;
/* first see if we have any match at all */
tpat0 = rpat;
d = pat;
if (*d == '\\')
++d;
if (*d == '#') {
/* anchor at the beginning */
tpat1 = shf_smprintf("%s%c*", ++tpat0, MAGIC);
tpat2 = tpat1;
} else if (*d == '%') {
/* anchor at the end */
tpat1 = shf_smprintf("%c*%s", MAGIC, ++tpat0);
tpat2 = tpat0;
} else {
/* float */
tpat1 = shf_smprintf("%c*%s%c*", MAGIC, rpat, MAGIC);
tpat2 = tpat1 + 2;
}
#if 0
fprintf(stderr, "D: 「%s」 → %s%s\n", pat, tpat0, rrep);
fflush(stderr);
#endif
again_repl:
/* this would not be necessary if gmatchx would return
* the start and end values of a match found, like re*
*/
if (!gmatchx(sbeg, tpat1, false))
goto end_repl;
/* now anchor the beginning of the match */
if (*pat != '#')
while (sbeg <= end) {
if (gmatchx(sbeg, tpat2, false))
break;
else
sbeg++;
}
/* now anchor the end of the match */
p = end;
if (*pat != '%')
while (p >= sbeg) {
bool gotmatch;
c = *p; *p = '\0';
gotmatch = gmatchx(sbeg, tpat0, false);
*p = c;
if (gotmatch)
break;
p--;
}
end = str_nsave(s, sbeg - s, ATEMP);
d = shf_smprintf("%s%s%s", end, rrep, p);
afree(end, ATEMP);
sbeg = d + (sbeg - s) + strlen(rrep);
if (s != str)
afree(s, ATEMP);
s = d;
end = strnul(s);
if ((how & 0xFF) != '/')
goto again_repl;
end_repl:
afree(rpat, ATEMP);
afree(tpat1, ATEMP);
return (s);
}
}
return str; /* no match, return string */

51
lex.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.53 2008/02/27 01:00:09 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.54 2008/03/01 21:10:25 tg Exp $");
/*
* states while lexing word
@ -211,41 +211,12 @@ yylex(int cf)
statep->ls_sadelim.nparen--;
else if (statep->ls_sadelim.nparen == 0 &&
(c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
#ifdef notyet
if (statep->ls_sadelim.style == SADELIM_MAKE &&
statep->ls_sadelim.num == 1) {
if (c == /*{*/'}')
yyerror("%s: expected '%c' %s\n",
T_synerr,
statep->ls_sadelim.delimiter,
/*{*/ "before '}'");
else {
*wp++ = ADELIM;
*wp++ = c; /* .delimiter */
while ((c = getsc()) != /*{*/ '}') {
if (!c) {
yyerror("%s: expected '%c' %s\n",
T_synerr,
/*{*/ '}', "at end of input");
} else if (strchr(sadelim_flags[statep->ls_sadelim.flags], c)) {
*wp++ = CHAR;
*wp++ = c;
} else {
char Ttmp[15] = "instead of ' '";
Ttmp[12] = c;
yyerror("%s: expected '%c' %s\n",
T_synerr,
/*{*/ '}', Ttmp);
}
}
}
}
#endif /* SADELIM_MAKE */
*wp++ = ADELIM;
*wp++ = c;
if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
POP_STATE();
if (c == /*{*/ '}')
POP_STATE();
break;
}
/* FALLTHROUGH */
@ -369,6 +340,7 @@ yylex(int cf)
*wp++ = '0';
*wp++ = ADELIM;
*wp++ = ':';
PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = ':';
@ -384,6 +356,7 @@ yylex(int cf)
*wp++ = ' ';
}
ungetsc(c);
PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = ':';
@ -391,6 +364,20 @@ yylex(int cf)
statep->ls_sadelim.nparen = 0;
break;
}
} else if (c == '/') {
*wp++ = CHAR, *wp++ = c;
if ((c = getsc()) == '/') {
*wp++ = ADELIM;
*wp++ = c;
} else
ungetsc(c);
PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = '/';
statep->ls_sadelim.num = 1;
statep->ls_sadelim.nparen = 0;
break;
}
/* If this is a trim operation,
* treat (,|,) specially in STBRACE.

9
sh.h
View File

@ -8,7 +8,7 @@
/* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */
/* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.191 2008/03/01 02:21:38 tg Rel $"
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.192 2008/03/01 21:10:26 tg Exp $"
#define MKSH_VERSION "R33 2008/03/01"
#if HAVE_SYS_PARAM_H
@ -538,13 +538,12 @@ EXTERN int really_exit;
#define C_SUBOP1 BIT(5) /* "=-+?" */
#define C_QUOTE BIT(6) /* \n\t"#$&'()*;<>?[]\`| (needing quoting) */
#define C_IFS BIT(7) /* $IFS */
#define C_SUBOP2 BIT(8) /* "#%/" (magic, see below) */
#define C_SUBOP2 BIT(8) /* "#%" (magic, see below) */
extern unsigned char chtypes[];
#define ctype(c, t) !!( ((t) == C_SUBOP2) ? \
(((c) == '#' || (c) == '%' || \
(c) == '/') ? 1 : 0) : \
(((c) == '#' || (c) == '%') ? 1 : 0) : \
(chtypes[(unsigned char)(c)]&(t)) )
#define ksh_isalphx(c) ctype((c), C_ALPHA)
#define ksh_isalnux(c) ctype((c), C_ALPHA | C_DIGIT)
@ -1451,7 +1450,7 @@ char *snptreef(char *, int, const char *, ...);
struct op *tcopy(struct op *, Area *);
char *wdcopy(const char *, Area *);
const char *wdscan(const char *, int);
char *wdstrip(const char *);
char *wdstrip(const char *, bool, bool);
void tfree(struct op *, Area *);
/* var.c */
void newblock(void);

4
syn.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.18 2007/10/25 15:34:30 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.19 2008/03/01 21:10:26 tg Exp $");
struct nesting_state {
int start_token; /* token than began nesting (eg, FOR) */
@ -551,7 +551,7 @@ function_body(char *name,
struct op *t;
int old_func_parse;
sname = wdstrip(name);
sname = wdstrip(name, false, false);
/* Check for valid characters in name. posix and ksh93 say only
* allow [a-zA-Z_0-9] but this allows more as old pdkshs have
* allowed more (the following were never allowed:

30
tree.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.14 2008/02/26 21:08:33 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.15 2008/03/01 21:10:26 tg Exp $");
#define INDENT 4
@ -528,7 +528,7 @@ wdscan(const char *wp, int c)
* (string is allocated from ATEMP)
*/
char *
wdstrip(const char *wp)
wdstrip(const char *wp, bool keepq, bool make_magic)
{
struct shf shf;
int c;
@ -538,7 +538,7 @@ wdstrip(const char *wp)
/* problems:
* `...` -> $(...)
* x${foo:-"hi"} -> x${foo:-hi}
* x${foo:-'hi'} -> x${foo:-hi}
* x${foo:-'hi'} -> x${foo:-hi} unless keepq
*/
while (1)
switch ((c = *wp++)) {
@ -546,8 +546,17 @@ wdstrip(const char *wp)
return shf_sclose(&shf); /* null terminates */
case ADELIM:
case CHAR:
c = *wp++;
if (make_magic && (ISMAGIC(c) || c == '[' || c == NOT ||
c == '-' || c == ']' || c == '*' || c == '?'))
shf_putchar(MAGIC, &shf);
shf_putchar(c, &shf);
break;
case QCHAR:
shf_putchar(*wp++, &shf);
c = *wp++;
if (keepq && (c == '"' || c == '`' || c == '$' || c == '\\'))
shf_putchar('\\', &shf);
shf_putchar(c, &shf);
break;
case COMSUB:
shf_puts("$(", &shf);
@ -577,13 +586,22 @@ wdstrip(const char *wp)
shf_putchar('}', &shf);
break;
case OPAT:
shf_putchar(*wp++, &shf);
shf_putchar('(', &shf);
if (make_magic) {
shf_putchar(MAGIC, &shf);
shf_putchar(*wp++ | 0x80, &shf);
} else {
shf_putchar(*wp++, &shf);
shf_putchar('(', &shf);
}
break;
case SPAT:
if (make_magic)
shf_putchar(MAGIC, &shf);
shf_putchar('|', &shf);
break;
case CPAT:
if (make_magic)
shf_putchar(MAGIC, &shf);
shf_putchar(')', &shf);
break;
}