diff --git a/check.t b/check.t index 1feb5d3..e8fd687 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.154 2008/03/01 17:14:17 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.155 2008/03/01 21:10:25 tg Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ @@ -1076,20 +1076,60 @@ expected-stdout: 5: 123 6: 123 --- -name: eglob-substrpl-3 +name: eglob-substrpl-3a description: Check substring replacement works with variables and slashes, too stdin: pfx=/home/user wd=/home/user/tmp echo ${wd/#$pfx/~} + echo ${wd/#\$pfx/~} echo ${wd/#"$pfx"/~} echo ${wd/#'$pfx'/~} + echo ${wd/#"\$pfx"/~} + echo ${wd/#'\$pfx'/~} +expected-stdout: + ~/tmp + /home/user/tmp + ~/tmp + /home/user/tmp + /home/user/tmp + /home/user/tmp +--- +name: eglob-substrpl-3b +description: + More of this, bash fails it +stdin: + pfx=/home/user + wd=/home/user/tmp + echo ${wd/#$(echo /home/user)/~} + echo ${wd/#"$(echo /home/user)"/~} + echo ${wd/#'$(echo /home/user)'/~} expected-stdout: ~/tmp ~/tmp /home/user/tmp --- +name: eglob-substrpl-3c +description: + Even more weird cases +stdin: + pfx=/home/user + wd='$pfx/tmp' + echo ${wd/#$pfx/~} + echo ${wd/#\$pfx/~} + echo ${wd/#"$pfx"/~} + echo ${wd/#'$pfx'/~} + echo ${wd/#"\$pfx"/~} + echo ${wd/#'\$pfx'/~} +expected-stdout: + $pfx/tmp + ~/tmp + $pfx/tmp + ~/tmp + ~/tmp + ~/tmp +--- name: glob-bad-1 description: Check that globbing isn't done when glob has syntax error diff --git a/eval.c b/eval.c index 6201eb9..d7a9ce4 100644 --- a/eval.c +++ b/eval.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.42 2008/03/01 17:14:17 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.43 2008/03/01 21:10:25 tg Exp $"); #ifdef MKSH_SMALL #define MKSH_NOPWNAM @@ -166,7 +166,6 @@ expand(const char *cp, /* input word */ int newlines = 0; /* For trailing newlines in COMSUB */ int saw_eq, tilde_ok; int make_magic; - int sqmode = 0; /* keep backslashes before [\\/%#] */ size_t len; if (cp == NULL) @@ -339,11 +338,11 @@ expand(const char *cp, /* input word */ end[-2] = EOS; sp += end - beg - 1; } - evaluate(substitute(stg = wdstrip(beg), 0), + evaluate(substitute(stg = wdstrip(beg, false, false), 0), &from, KSH_UNWIND_ERROR, true); afree(stg, ATEMP); if (end) { - evaluate(stg = wdstrip(mid), + evaluate(stg = wdstrip(mid, false, false), &num, KSH_UNWIND_ERROR, true); afree(stg, ATEMP); } @@ -361,6 +360,134 @@ expand(const char *cp, /* input word */ x.str = str_nsave(beg, num, ATEMP); goto do_CSUBST; } + case '/': { + char *s, *p, *d, *sbeg, *end; + char *pat, *rrep; + char *tpat0, *tpat1, *tpat2; + + /* ! DOBLANK,DOBRACE_,DOTILDE */ + f = DOPAT | (f&DONTRUNCOMMAND) | + DOTEMP_; + quote = 0; + + s = wdcopy(sp, ATEMP); + p = s + (wdscan(sp, ADELIM) - sp); + d = s + (wdscan(sp, CSUBST) - sp); +#if 0 + fprintf(stderr, + "D: s=%p 〈%s〉\n" + " p=%p 〈%s〉\n" + " d=%p 〈%s〉\n", + s, wdstrip(s, true, false), + p, wdstrip(p, true, false), + d, wdstrip(d, true, false)); + fflush(stderr); +#endif + if (p >= d) + goto unwind_substsyn; + p[-2] = EOS; + if (p[-1] == /*{*/'}') + d = NULL; + else + d[-2] = EOS; + sp += (d ? d : p) - s - 1; + tpat0 = wdstrip(s, true, true); + pat = substitute(tpat0, 0); + rrep = d ? wdstrip(p, true, false) : null; + afree(s, ATEMP); + s = d = pat; + while (*s) + if (*s != '\\' || + s[1] == '%' || + s[1] == '#' || + s[1] == '\0' || + /* XXX really? */ s[1] == '\\' || + s[1] == '/') + *d++ = *s++; + else + s++; + *d = '\0'; +#if 0 + fprintf(stderr, + "D: 〔%s|%s〕→〔%s〕\n", + tpat0, pat, rrep); + fflush(stderr); +#endif + afree(tpat0, ATEMP); + + /* reject empty pattern */ + if (!*pat) + goto no_repl; + + /* prepare string on which to work */ + sbeg = s = str_save(str_val(st->var), ATEMP); + + /* first see if we have any match at all */ + tpat0 = pat; + if (*pat == '#') { + /* anchor at the beginning */ + tpat1 = shf_smprintf("%s%c*", ++tpat0, MAGIC); + tpat2 = tpat1; + } else if (*pat == '%') { + /* anchor at the end */ + tpat1 = shf_smprintf("%c*%s", MAGIC, ++tpat0); + tpat2 = tpat0; + } else { + /* float */ + tpat1 = shf_smprintf("%c*%s%c*", MAGIC, pat, MAGIC); + tpat2 = tpat1 + 2; + } + again_repl: +#if 0 + fprintf(stderr, + "D: 「%s」 ← 〔%s|%s〕\n", + s, tpat0, rrep); + fflush(stderr); +#endif + /* this would not be necessary if gmatchx would return + * the start and end values of a match found, like re* + */ + if (!gmatchx(sbeg, tpat1, false)) + goto end_repl; + end = strnul(s); + /* now anchor the beginning of the match */ + if (*pat != '#') + while (sbeg <= end) { + if (gmatchx(sbeg, tpat2, false)) + break; + else + sbeg++; + } + /* now anchor the end of the match */ + p = end; + if (*pat != '%') + while (p >= sbeg) { + bool gotmatch; + + c = *p; *p = '\0'; + gotmatch = gmatchx(sbeg, tpat0, false); + *p = c; + if (gotmatch) + break; + p--; + } + end = str_nsave(s, sbeg - s, ATEMP); + d = shf_smprintf("%s%s%s", end, rrep, p); + afree(end, ATEMP); + sbeg = d + (sbeg - s) + strlen(rrep); + afree(s, ATEMP); + s = d; + if (stype & 0x80) + goto again_repl; + end_repl: + afree(tpat1, ATEMP); + x.str = s; + no_repl: + afree(pat, ATEMP); + if (rrep != null) + afree(rrep, ATEMP); + goto do_CSUBST; + } case '#': case '%': /* ! DOBLANK,DOBRACE_,DOTILDE */ @@ -374,13 +501,6 @@ expand(const char *cp, /* input word */ *dp++ = MAGIC; *dp++ = (char)('@' | 0x80); break; - case '/': - /* ! DOBLANK,DOBRACE_,DOTILDE */ - f = DOPAT | (f&DONTRUNCOMMAND) | - DOTEMP_; - quote = 0; - sqmode = 2; - break; case '=': /* Enabling tilde expansion * after :s here is @@ -424,7 +544,6 @@ expand(const char *cp, /* input word */ tilde_ok = 0; /* in case of ${unset:-} */ *dp = '\0'; quote = st->quote; - sqmode = 0; f = st->f; if (f&DOBLANK) doblank--; @@ -432,10 +551,7 @@ expand(const char *cp, /* input word */ case '#': case '%': /* Append end-pattern */ - *dp++ = MAGIC; *dp++ = ')'; - /* FALLTHROUGH */ - case '/': - *dp = '\0'; + *dp++ = MAGIC; *dp++ = ')'; *dp = '\0'; dp = Xrestpos(ds, dp, st->base); /* Must use st->var since calling * global would break things @@ -486,6 +602,7 @@ expand(const char *cp, /* input word */ (debunk(s, s, strlen(s) + 1), s)); } case '0': + case '/': dp = Xrestpos(ds, dp, st->base); type = XSUB; if (f&DOBLANK) @@ -600,19 +717,6 @@ expand(const char *cp, /* input word */ break; } - if (sqmode) { - /* keep backslash before backslash or C_SUBOP2 char */ - if ((c == '\\') || - (quote && c == '/') || - (quote && sqmode == 2 && ctype(c, C_SUBOP2))) - *dp++ = '\\'; - if (sqmode == 2 && (quote || c != '/')) - /* beginning of string, ign. leading chars */ - sqmode = 1; - else if (!quote && c == '/') - sqmode = 0; - } - /* check for end of word or IFS separation */ if (c == 0 || (!quote && (f & DOBLANK) && doblank && !make_magic && ctype(c, C_IFS))) { @@ -824,7 +928,14 @@ varsub(Expand *xp, const char *sp, const char *word, stype = 0x80; c = word[slen + 0] == CHAR ? word[slen + 1] : 0; } - if (stype == 0x80 && (c == ' ' || c == '0')) { + if (!stype && c == '/') { + slen += 2; + stype = c; + if (word[slen] == ADELIM) { + slen += 2; + stype |= 0x80; + } + } else if (stype == 0x80 && (c == ' ' || c == '0')) { stype |= '0'; } else if (ctype(c, C_SUBOP1)) { slen += 2; @@ -848,7 +959,6 @@ varsub(Expand *xp, const char *sp, const char *word, switch (stype & 0x7f) { case '=': /* can't assign to a vector */ case '%': /* can't trim a vector (yet) */ - case '/': case '#': return -1; } @@ -871,7 +981,6 @@ varsub(Expand *xp, const char *sp, const char *word, case '=': /* can't assign to a vector */ case '%': /* can't trim a vector (yet) */ case '#': - case '/': case '?': return -1; } @@ -906,7 +1015,7 @@ varsub(Expand *xp, const char *sp, const char *word, c = stype&0x7f; /* test the compiler's code generator */ - if (ctype(c, C_SUBOP2) || stype == (0x80 | '0') || + if (ctype(c, C_SUBOP2) || stype == (0x80 | '0') || c == '/' || (((stype&0x80) ? *xp->str=='\0' : xp->str==null) ? /* undef? */ c == '=' || c == '-' || c == '?' : c == '+')) state = XBASE; /* expand word instead of variable value */ @@ -1011,99 +1120,6 @@ trimsub(char *str, char *pat, int how) return str_nsave(str, p - str, ATEMP); } break; - case '/': /* replace once - SLOW! */ - case '/'|0x80: /* replace all - SLOWER! */ - { - char *rpat, *rrep, *tpat1, *tpat2, *tpat0, *sbeg, *s, *d; - - /* separate search pattern and replacement string */ - s = d = rpat = str_save(pat, ATEMP); - rrep = null; - while ((c = *s++)) - if (c == '\\') { - if (s[0] == '\\' && s[1] != '/') - ++s; - if (!(*d++ = *s++)) - break; - } else if (c == '/') { - rrep = s; - break; - } else - *d++ = c; - *d++ = '\0'; - /* do not accept empty pattern */ - if (!*rpat) { - afree(rpat, ATEMP); - return (str); - } - - /* prepare string on which to work */ - sbeg = s = str; - - /* first see if we have any match at all */ - tpat0 = rpat; - d = pat; - if (*d == '\\') - ++d; - if (*d == '#') { - /* anchor at the beginning */ - tpat1 = shf_smprintf("%s%c*", ++tpat0, MAGIC); - tpat2 = tpat1; - } else if (*d == '%') { - /* anchor at the end */ - tpat1 = shf_smprintf("%c*%s", MAGIC, ++tpat0); - tpat2 = tpat0; - } else { - /* float */ - tpat1 = shf_smprintf("%c*%s%c*", MAGIC, rpat, MAGIC); - tpat2 = tpat1 + 2; - } -#if 0 - fprintf(stderr, "D: 「%s」 → 〔%s|%s〕\n", pat, tpat0, rrep); - fflush(stderr); -#endif - again_repl: - /* this would not be necessary if gmatchx would return - * the start and end values of a match found, like re* - */ - if (!gmatchx(sbeg, tpat1, false)) - goto end_repl; - /* now anchor the beginning of the match */ - if (*pat != '#') - while (sbeg <= end) { - if (gmatchx(sbeg, tpat2, false)) - break; - else - sbeg++; - } - /* now anchor the end of the match */ - p = end; - if (*pat != '%') - while (p >= sbeg) { - bool gotmatch; - - c = *p; *p = '\0'; - gotmatch = gmatchx(sbeg, tpat0, false); - *p = c; - if (gotmatch) - break; - p--; - } - end = str_nsave(s, sbeg - s, ATEMP); - d = shf_smprintf("%s%s%s", end, rrep, p); - afree(end, ATEMP); - sbeg = d + (sbeg - s) + strlen(rrep); - if (s != str) - afree(s, ATEMP); - s = d; - end = strnul(s); - if ((how & 0xFF) != '/') - goto again_repl; - end_repl: - afree(rpat, ATEMP); - afree(tpat1, ATEMP); - return (s); - } } return str; /* no match, return string */ diff --git a/lex.c b/lex.c index 800f9a9..0ef4410 100644 --- a/lex.c +++ b/lex.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.53 2008/02/27 01:00:09 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.54 2008/03/01 21:10:25 tg Exp $"); /* * states while lexing word @@ -211,41 +211,12 @@ yylex(int cf) statep->ls_sadelim.nparen--; else if (statep->ls_sadelim.nparen == 0 && (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) { -#ifdef notyet - if (statep->ls_sadelim.style == SADELIM_MAKE && - statep->ls_sadelim.num == 1) { - if (c == /*{*/'}') - yyerror("%s: expected '%c' %s\n", - T_synerr, - statep->ls_sadelim.delimiter, - /*{*/ "before '}'"); - else { - *wp++ = ADELIM; - *wp++ = c; /* .delimiter */ - while ((c = getsc()) != /*{*/ '}') { - if (!c) { - yyerror("%s: expected '%c' %s\n", - T_synerr, - /*{*/ '}', "at end of input"); - } else if (strchr(sadelim_flags[statep->ls_sadelim.flags], c)) { - *wp++ = CHAR; - *wp++ = c; - } else { - char Ttmp[15] = "instead of ' '"; - - Ttmp[12] = c; - yyerror("%s: expected '%c' %s\n", - T_synerr, - /*{*/ '}', Ttmp); - } - } - } - } -#endif /* SADELIM_MAKE */ *wp++ = ADELIM; *wp++ = c; if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0) POP_STATE(); + if (c == /*{*/ '}') + POP_STATE(); break; } /* FALLTHROUGH */ @@ -369,6 +340,7 @@ yylex(int cf) *wp++ = '0'; *wp++ = ADELIM; *wp++ = ':'; + PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_sadelim.style = SADELIM_BASH; statep->ls_sadelim.delimiter = ':'; @@ -384,6 +356,7 @@ yylex(int cf) *wp++ = ' '; } ungetsc(c); + PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_sadelim.style = SADELIM_BASH; statep->ls_sadelim.delimiter = ':'; @@ -391,6 +364,20 @@ yylex(int cf) statep->ls_sadelim.nparen = 0; break; } + } else if (c == '/') { + *wp++ = CHAR, *wp++ = c; + if ((c = getsc()) == '/') { + *wp++ = ADELIM; + *wp++ = c; + } else + ungetsc(c); + PUSH_STATE(SBRACE); + PUSH_STATE(SADELIM); + statep->ls_sadelim.style = SADELIM_BASH; + statep->ls_sadelim.delimiter = '/'; + statep->ls_sadelim.num = 1; + statep->ls_sadelim.nparen = 0; + break; } /* If this is a trim operation, * treat (,|,) specially in STBRACE. diff --git a/sh.h b/sh.h index c097376..ce559ce 100644 --- a/sh.h +++ b/sh.h @@ -8,7 +8,7 @@ /* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */ /* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */ -#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.191 2008/03/01 02:21:38 tg Rel $" +#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.192 2008/03/01 21:10:26 tg Exp $" #define MKSH_VERSION "R33 2008/03/01" #if HAVE_SYS_PARAM_H @@ -538,13 +538,12 @@ EXTERN int really_exit; #define C_SUBOP1 BIT(5) /* "=-+?" */ #define C_QUOTE BIT(6) /* \n\t"#$&'()*;<>?[]\`| (needing quoting) */ #define C_IFS BIT(7) /* $IFS */ -#define C_SUBOP2 BIT(8) /* "#%/" (magic, see below) */ +#define C_SUBOP2 BIT(8) /* "#%" (magic, see below) */ extern unsigned char chtypes[]; #define ctype(c, t) !!( ((t) == C_SUBOP2) ? \ - (((c) == '#' || (c) == '%' || \ - (c) == '/') ? 1 : 0) : \ + (((c) == '#' || (c) == '%') ? 1 : 0) : \ (chtypes[(unsigned char)(c)]&(t)) ) #define ksh_isalphx(c) ctype((c), C_ALPHA) #define ksh_isalnux(c) ctype((c), C_ALPHA | C_DIGIT) @@ -1451,7 +1450,7 @@ char *snptreef(char *, int, const char *, ...); struct op *tcopy(struct op *, Area *); char *wdcopy(const char *, Area *); const char *wdscan(const char *, int); -char *wdstrip(const char *); +char *wdstrip(const char *, bool, bool); void tfree(struct op *, Area *); /* var.c */ void newblock(void); diff --git a/syn.c b/syn.c index 5d3a31b..060946b 100644 --- a/syn.c +++ b/syn.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.18 2007/10/25 15:34:30 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.19 2008/03/01 21:10:26 tg Exp $"); struct nesting_state { int start_token; /* token than began nesting (eg, FOR) */ @@ -551,7 +551,7 @@ function_body(char *name, struct op *t; int old_func_parse; - sname = wdstrip(name); + sname = wdstrip(name, false, false); /* Check for valid characters in name. posix and ksh93 say only * allow [a-zA-Z_0-9] but this allows more as old pdkshs have * allowed more (the following were never allowed: diff --git a/tree.c b/tree.c index 45defea..9ab1868 100644 --- a/tree.c +++ b/tree.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.14 2008/02/26 21:08:33 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.15 2008/03/01 21:10:26 tg Exp $"); #define INDENT 4 @@ -528,7 +528,7 @@ wdscan(const char *wp, int c) * (string is allocated from ATEMP) */ char * -wdstrip(const char *wp) +wdstrip(const char *wp, bool keepq, bool make_magic) { struct shf shf; int c; @@ -538,7 +538,7 @@ wdstrip(const char *wp) /* problems: * `...` -> $(...) * x${foo:-"hi"} -> x${foo:-hi} - * x${foo:-'hi'} -> x${foo:-hi} + * x${foo:-'hi'} -> x${foo:-hi} unless keepq */ while (1) switch ((c = *wp++)) { @@ -546,8 +546,17 @@ wdstrip(const char *wp) return shf_sclose(&shf); /* null terminates */ case ADELIM: case CHAR: + c = *wp++; + if (make_magic && (ISMAGIC(c) || c == '[' || c == NOT || + c == '-' || c == ']' || c == '*' || c == '?')) + shf_putchar(MAGIC, &shf); + shf_putchar(c, &shf); + break; case QCHAR: - shf_putchar(*wp++, &shf); + c = *wp++; + if (keepq && (c == '"' || c == '`' || c == '$' || c == '\\')) + shf_putchar('\\', &shf); + shf_putchar(c, &shf); break; case COMSUB: shf_puts("$(", &shf); @@ -577,13 +586,22 @@ wdstrip(const char *wp) shf_putchar('}', &shf); break; case OPAT: - shf_putchar(*wp++, &shf); - shf_putchar('(', &shf); + if (make_magic) { + shf_putchar(MAGIC, &shf); + shf_putchar(*wp++ | 0x80, &shf); + } else { + shf_putchar(*wp++, &shf); + shf_putchar('(', &shf); + } break; case SPAT: + if (make_magic) + shf_putchar(MAGIC, &shf); shf_putchar('|', &shf); break; case CPAT: + if (make_magic) + shf_putchar(MAGIC, &shf); shf_putchar(')', &shf); break; }