diff --git a/check.t b/check.t index c2641e1..6200066 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.747 2016/08/01 21:29:05 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.748 2016/08/01 21:37:59 tg Exp $ # -*- mode: sh -*- #- # Copyright © 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, @@ -30,7 +30,7 @@ # (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date expected-stdout: - @(#)MIRBSD KSH R53 2016/07/28 + @(#)MIRBSD KSH R53 2016/08/01 description: Check version of shell. stdin: @@ -39,7 +39,7 @@ name: KSH_VERSION category: shell:legacy-no --- expected-stdout: - @(#)LEGACY KSH R53 2016/07/28 + @(#)LEGACY KSH R53 2016/08/01 description: Check version of legacy shell. stdin: @@ -1791,6 +1791,41 @@ stdin: expected-stdout: 1 . --- +name: expand-slashes-1 +description: + Check that side effects in substring replacement are handled correctly +stdin: + foo=n1n1n1n2n3 + i=2 + n=1 + echo 1 ${foo//n$((n++))/[$((++i))]} . + echo 2 $n , $i . +expected-stdout: + 1 [3][3][3]n2n3 . + 2 2 , 3 . +--- +name: expand-slashes-2 +description: + Check that side effects in substring replacement are handled correctly +stdin: + foo=n1n1n1n2n3 + i=2 + n=1 + echo 1 ${foo@/n$((n++))/[$((++i))]} . + echo 2 $n , $i . +expected-stdout: + 1 [3]n1n1[4][5] . + 2 5 , 5 . +--- +name: expand-slashes-3 +description: + Check that we can access the replaced string +stdin: + foo=n1n1n1n2n3 + echo 1 ${foo@/n[12]/[$KSH_MATCH]} . +expected-stdout: + 1 [n1][n1][n1][n2]n3 . +--- name: eglob-bad-1 description: Check that globbing isn't done when glob has syntax error diff --git a/eval.c b/eval.c index 2cac31d..ab1cc88 100644 --- a/eval.c +++ b/eval.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.191 2016/07/25 00:04:41 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.192 2016/08/01 21:38:01 tg Exp $"); /* * string expansion @@ -186,7 +186,7 @@ evalonestr(const char *cp, int f) rv = (char *) *XPptrv(w); break; default: - rv = evalstr(cp, f&~DOGLOB); + rv = evalstr(cp, f & ~DOGLOB); break; } XPfree(w); @@ -478,12 +478,14 @@ expand( strndupx(x.str, beg, num, ATEMP); goto do_CSUBST; } + case 0x100 | '/': case '/': { char *s, *p, *d, *sbeg, *end; - char *pat, *rrep; + char *pat = NULL, *rrep = null; char fpat = 0, *tpat1, *tpat2; + char *ws, *wpat, *wrep; - s = wdcopy(sp, ATEMP); + s = ws = wdcopy(sp, ATEMP); p = s + (wdscan(sp, ADELIM) - sp); d = s + (wdscan(sp, CSUBST) - sp); p[-2] = EOS; @@ -492,16 +494,24 @@ expand( else d[-2] = EOS; sp += (d ? d : p) - s - 1; - if (!(stype & 0x80) && + if (!(stype & 0x180) && s[0] == CHAR && (s[1] == '#' || s[1] == '%')) fpat = s[1]; - pat = evalstr(s + (fpat ? 2 : 0), - DOTILDE | DOSCALAR | DOPAT); - rrep = d ? evalstr(p, - DOTILDE | DOSCALAR) : null; - afree(s, ATEMP); + wpat = s + (fpat ? 2 : 0); + wrep = d ? p : NULL; + if (!(stype & 0x100)) { + rrep = wrep ? evalstr(wrep, + DOTILDE | DOSCALAR) : + null; + } + /* prepare string on which to work */ + strdupx(s, str_val(st->var), ATEMP); + sbeg = s; + again_search: + pat = evalstr(wpat, + DOTILDE | DOSCALAR | DOPAT); /* check for special cases */ if (!*pat && !fpat) { /* @@ -510,19 +520,15 @@ expand( */ goto no_repl; } - if ((stype & 0x80) && + if ((stype & 0x180) && gmatchx(null, pat, false)) { /* * pattern matches empty * string => don't loop */ - stype &= ~0x80; + stype &= ~0x180; } - /* prepare string on which to work */ - strdupx(s, str_val(st->var), ATEMP); - sbeg = s; - /* first see if we have any match at all */ if (fpat == '#') { /* anchor at the beginning */ @@ -567,13 +573,27 @@ expand( break; p--; } + strndupx(end, sbeg, p - sbeg, ATEMP); + record_match(end); + afree(end, ATEMP); + if (stype & 0x100) { + if (rrep != null) + afree(rrep, ATEMP); + rrep = wrep ? evalstr(wrep, + DOTILDE | DOSCALAR) : + null; + } strndupx(end, s, sbeg - s, ATEMP); d = shf_smprintf(Tf_sss, end, rrep, p); afree(end, ATEMP); sbeg = d + (sbeg - s) + strlen(rrep); afree(s, ATEMP); s = d; - if (stype & 0x80) + if (stype & 0x100) { + afree(tpat1, ATEMP); + afree(pat, ATEMP); + goto again_search; + } else if (stype & 0x80) goto again_repl; end_repl: afree(tpat1, ATEMP); @@ -582,6 +602,7 @@ expand( afree(pat, ATEMP); if (rrep != null) afree(rrep, ATEMP); + afree(ws, ATEMP); goto do_CSUBST; } case '#': @@ -733,6 +754,7 @@ expand( debunk(dp, dp, strlen(dp) + 1)); break; case '0': + case 0x100 | '/': case '/': case 0x100 | '#': case 0x100 | 'Q': @@ -1207,6 +1229,7 @@ varsub(Expand *xp, const char *sp, const char *word, case '#': case '?': case '0': + case 0x100 | '/': case '/': case 0x100 | '#': case 0x100 | 'Q': @@ -1237,6 +1260,7 @@ varsub(Expand *xp, const char *sp, const char *word, case '#': case '?': case '0': + case 0x100 | '/': case '/': case 0x100 | '#': case 0x100 | 'Q': @@ -1277,13 +1301,13 @@ varsub(Expand *xp, const char *sp, const char *word, c = stype & 0x7F; /* test the compiler's code generator */ - if (((stype < 0x100) && (ctype(c, C_SUBOP2) || c == '/' || + if (((stype < 0x100) && (ctype(c, C_SUBOP2) || (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) && (state != XARG || (ifs0 || xp->split ? (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ? c == '=' || c == '-' || c == '?' : c == '+'))) || stype == (0x80 | '0') || stype == (0x100 | '#') || - stype == (0x100 | 'Q')) + stype == (0x100 | 'Q') || (stype & 0x7F) == '/') /* expand word instead of variable value */ state = XBASE; if (Flag(FNOUNSET) && xp->str == null && !zero_ok && @@ -1420,6 +1444,7 @@ trimsub(char *str, char *pat, int how) for (p = str; p <= end; p += utf_ptradj(p)) { c = *p; *p = '\0'; if (gmatchx(str, pat, false)) { + record_match(str); *p = c; return (p); } @@ -1431,6 +1456,7 @@ trimsub(char *str, char *pat, int how) for (p = end; p >= str; p--) { c = *p; *p = '\0'; if (gmatchx(str, pat, false)) { + record_match(str); *p = c; return (p); } @@ -1458,6 +1484,7 @@ trimsub(char *str, char *pat, int how) for (p = str; p <= end; p++) if (gmatchx(p, pat, false)) { trimsub_match: + record_match(p); strndupx(end, str, p - str, ATEMP); return (end); } diff --git a/exec.c b/exec.c index 36a7acd..e3149cd 100644 --- a/exec.c +++ b/exec.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.178 2016/07/25 00:04:41 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.179 2016/08/01 21:38:02 tg Exp $"); #ifndef MKSH_DEFAULT_EXECSHELL #define MKSH_DEFAULT_EXECSHELL MKSH_UNIXROOT "/bin/sh" @@ -390,6 +390,7 @@ execute(struct op * volatile t, for (ap = (const char **)t->vars; *ap; ap++) { if (i || ((s = evalstr(*ap, DOTILDE|DOPAT)) && gmatchx(ccp, s, false))) { + record_match(ccp); rv = execute(t->left, flags & XERROK, xerrok); i = 0; diff --git a/funcs.c b/funcs.c index a847d90..e052fac 100644 --- a/funcs.c +++ b/funcs.c @@ -38,7 +38,7 @@ #endif #endif -__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.304 2016/08/01 14:23:24 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.305 2016/08/01 21:38:02 tg Exp $"); #if HAVE_KILLPG /* @@ -3202,14 +3202,20 @@ test_eval(Test_env *te, Test_op op, const char *opnd1, const char *opnd2, /* = */ case TO_STEQL: - if (te->flags & TEF_DBRACKET) - return (gmatchx(opnd1, opnd2, false)); + if (te->flags & TEF_DBRACKET) { + if ((i = gmatchx(opnd1, opnd2, false))) + record_match(opnd1); + return (i); + } return (strcmp(opnd1, opnd2) == 0); /* != */ case TO_STNEQ: - if (te->flags & TEF_DBRACKET) - return (!gmatchx(opnd1, opnd2, false)); + if (te->flags & TEF_DBRACKET) { + if ((i = gmatchx(opnd1, opnd2, false))) + record_match(opnd1); + return (!i); + } return (strcmp(opnd1, opnd2) != 0); /* < */ diff --git a/lex.c b/lex.c index e959583..741bb93 100644 --- a/lex.c +++ b/lex.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.227 2016/07/25 21:05:21 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.228 2016/08/01 21:38:03 tg Exp $"); /* * states while lexing word @@ -462,10 +462,12 @@ yylex(int cf) break; } } else if (c == '/') { + c2 = ADELIM; + parse_adelim_slash: *wp++ = CHAR; *wp++ = c; if ((c = getsc()) == '/') { - *wp++ = ADELIM; + *wp++ = c2; *wp++ = c; } else ungetsc(c); @@ -475,6 +477,13 @@ yylex(int cf) statep->ls_adelim.num = 1; statep->nparen = 0; break; + } else if (c == '@') { + c2 = getsc(); + ungetsc(c2); + if (c2 == '/') { + c2 = CHAR; + goto parse_adelim_slash; + } } /* * If this is a trim operation, diff --git a/mksh.1 b/mksh.1 index dae6e1c..5ba4307 100644 --- a/mksh.1 +++ b/mksh.1 @@ -1,4 +1,4 @@ -.\" $MirOS: src/bin/mksh/mksh.1,v 1.408 2016/08/01 19:40:00 tg Exp $ +.\" $MirOS: src/bin/mksh/mksh.1,v 1.409 2016/08/01 21:38:04 tg Exp $ .\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $ .\"- .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, @@ -1682,6 +1682,25 @@ Inefficiently implemented, may be slow. .Pp .Sm off .It Xo +.Pf ${ Ar name +.Pf @/ Ar pattern / Ar string No } +.Xc +.Sm on +The same as +.Sm off +.Xo +.Pf ${ Ar name +.Pf // Ar pattern / Ar string No } , +.Xc +.Sm on +except that both +.Ar pattern +and +.Ar string +are expanded anew for each iteration. +.Pp +.Sm off +.It Xo .Pf ${ Ar name : Ns Ar pos .Pf : Ns Ar len Ns } .Xc @@ -1933,6 +1952,54 @@ The effective group id of the shell. The real group id of the shell. .It Ev KSHUID The real user id of the shell. +.It Ev KSH_MATCH +The last matched string. +In a future version, this will be an indexed array, +with indexes 1 and up capturing matching groups. +Set by string comparisons (== and !=) in double-bracket test +expressions when a match is found (when != returns false), by +.Ic case +when a match is encountered, and by the substitution operations +.Sm off +.Xo +.Pf ${ Ar x +.Pf # Ar pat No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf ## Ar pat No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf % Ar pat No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf %% Ar pat No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf / Ar pat / Ar rpl No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf /# Ar pat / Ar rpl No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf /% Ar pat / Ar rpl No } , +.Xc +.Xo +.Pf ${ Ar x +.Pf // Ar pat / Ar rpl No } , +.Xc +and +.Xo +.Pf ${ Ar x +.Pf @/ Ar pat / Ar rpl No } . +.Xc +.Sm on +See the end of the Emacs editing mode documentation for an example. .It Ev KSH_VERSION The name and version of the shell (read-only). See also the version commands in @@ -5794,6 +5861,11 @@ Immediately after a .Ic yank , replaces the inserted text string with the next previously killed text string. .El +.Pp +The tab completion escapes characters the same way as the following code: +.Bd -literal +print \-nr \-\- "${x@/[\e"\-\e$&\-*:\-?[\e\e\e\`{\-\e}${IFS\-$\*(aq \et\en\*(aq}]/\e\e$KSH_MATCH}" +.Ed .Ss Vi editing mode .Em Note: The vi command-line editing mode is orphaned, yet still functional. diff --git a/sh.h b/sh.h index 51ac29a..1940357 100644 --- a/sh.h +++ b/sh.h @@ -175,9 +175,9 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.782 2016/08/01 20:23:15 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.783 2016/08/01 21:38:05 tg Exp $"); #endif -#define MKSH_VERSION "R53 2016/07/28" +#define MKSH_VERSION "R53 2016/08/01" /* arithmetic types: C implementation */ #if !HAVE_CAN_INTTYPES @@ -2319,6 +2319,7 @@ uint32_t chvt_rndsetup(const void *, size_t) MKSH_A_PURE; mksh_ari_t rndget(void); void rndset(unsigned long); void rndpush(const void *); +void record_match(const char *); enum Test_op { /* non-operator */ diff --git a/var.c b/var.c index 5dda74f..bcde2f4 100644 --- a/var.c +++ b/var.c @@ -28,7 +28,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/var.c,v 1.206 2016/07/25 21:02:13 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/var.c,v 1.207 2016/08/01 21:38:07 tg Exp $"); /*- * Variables @@ -1739,3 +1739,15 @@ rndpush(const void *s) BAFHUpdateOctet_reg(h, 0); qh_state = h; } + +/* record last glob match */ +void +record_match(const char *istr) +{ + struct tbl *vp; + + vp = local("KSH_MATCH", false); + unset(vp, 1); + vp->flag = DEFINED | RDONLY; + setstr(vp, istr, 0x4); +}