the long-awaited recursive parser for COMSUB $(…) expressions

fixes RedHat BZ#496791
This commit is contained in:
tg
2011-03-06 01:25:35 +00:00
parent e93daee5f4
commit 25905b91a7
3 changed files with 246 additions and 209 deletions

324
lex.c
View File

@@ -1,7 +1,7 @@
/* $OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $ */ /* $OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $ */
/*- /*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
* Thorsten Glaser <tg@mirbsd.org> * Thorsten Glaser <tg@mirbsd.org>
* *
* Provided that these terms and disclaimer and all copyright notices * Provided that these terms and disclaimer and all copyright notices
@@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.122 2010/12/19 20:00:54 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.123 2011/03/06 01:25:33 tg Exp $");
/* /*
* states while lexing word * states while lexing word
@@ -35,30 +35,24 @@ __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.122 2010/12/19 20:00:54 tg Exp $");
#define SEQUOTE 5 /* inside $'' */ #define SEQUOTE 5 /* inside $'' */
#define SBRACE 6 /* inside ${} */ #define SBRACE 6 /* inside ${} */
#define SQBRACE 7 /* inside "${}" */ #define SQBRACE 7 /* inside "${}" */
#define SCSPAREN 8 /* inside $() */ #define SBQUOTE 8 /* inside `` */
#define SBQUOTE 9 /* inside `` */ #define SASPAREN 9 /* inside $(( )) */
#define SASPAREN 10 /* inside $(( )) */ #define SHEREDELIM 10 /* parsing <<,<<- delimiter */
#define SHEREDELIM 11 /* parsing <<,<<- delimiter */ #define SHEREDQUOTE 11 /* parsing " in <<,<<- delimiter */
#define SHEREDQUOTE 12 /* parsing " in <<,<<- delimiter */ #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
#define SPATTERN 13 /* parsing *(...|...) pattern (*+?@!) */ #define STBRACE 13 /* parsing ${...[#%]...} */
#define STBRACE 14 /* parsing ${...[#%]...} */ #define SLETARRAY 14 /* inside =( ), just copy */
#define SLETARRAY 15 /* inside =( ), just copy */ #define SADELIM 15 /* like SBASE, looking for delimiter */
#define SADELIM 16 /* like SBASE, looking for delimiter */ #define SHERESTRING 16 /* parsing <<< string */
#define SHERESTRING 17 /* parsing <<< string */
/* Structure to keep track of the lexing state and the various pieces of info /*
* needed for each particular state. */ * Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state.
*/
typedef struct lex_state Lex_state; typedef struct lex_state Lex_state;
struct lex_state { struct lex_state {
int ls_state; int ls_state;
union { union {
/* $(...) */
struct scsparen_info {
int nparen; /* count open parenthesis */
int csstate; /* XXX remove */
#define ls_scsparen ls_info.u_scsparen
} u_scsparen;
/* $((...)) */ /* $((...)) */
struct sasparen_info { struct sasparen_info {
int nparen; /* count open parenthesis */ int nparen; /* count open parenthesis */
@@ -216,7 +210,8 @@ yylex(int cf)
state = SLETARRAY; state = SLETARRAY;
statep->ls_sletarray.nparen = 0; statep->ls_sletarray.nparen = 0;
#endif #endif
} else { /* normal lexing */ } else {
/* normal lexing */
state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
while ((c = getsc()) == ' ' || c == '\t') while ((c = getsc()) == ' ' || c == '\t')
; ;
@@ -228,7 +223,8 @@ yylex(int cf)
} }
ungetsc(c); ungetsc(c);
} }
if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ if (source->flags & SF_ALIAS) {
/* trailing ' ' in alias definition */
source->flags &= ~SF_ALIAS; source->flags &= ~SF_ALIAS;
cf |= ALIAS; cf |= ALIAS;
} }
@@ -275,7 +271,8 @@ yylex(int cf)
/* FALLTHROUGH */ /* FALLTHROUGH */
case SBASE: case SBASE:
if (c == '[' && (cf & (VARASN|ARRAYVAR))) { if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
*wp = EOS; /* temporary */ /* temporary */
*wp = EOS;
if (is_wdvarname(Xstring(ws, wp), false)) { if (is_wdvarname(Xstring(ws, wp), false)) {
char *p, *tmp; char *p, *tmp;
@@ -385,10 +382,13 @@ yylex(int cf)
*wp++ = EXPRSUB; *wp++ = EXPRSUB;
} else { } else {
ungetsc(c); ungetsc(c);
PUSH_STATE(SCSPAREN); subst_command:
statep->ls_scsparen.nparen = 1; sp = yyrecursive();
statep->ls_scsparen.csstate = 0; c2 = strlen(sp) + 1;
XcheckN(ws, wp, c2);
*wp++ = COMSUB; *wp++ = COMSUB;
memcpy(wp, sp, c2);
wp += c2;
} }
} else if (c == '{') /*}*/ { } else if (c == '{') /*}*/ {
*wp++ = OSUBST; *wp++ = OSUBST;
@@ -414,7 +414,8 @@ yylex(int cf)
break; break;
} else if (ksh_isdigit(c) || } else if (ksh_isdigit(c) ||
c == '('/*)*/ || c == ' ' || c == '('/*)*/ || c == ' ' ||
c == '$' /* XXX what else? */) { /*XXX what else? */
c == '$') {
/* substring subst. */ /* substring subst. */
if (c != ' ') { if (c != ' ') {
*wp++ = CHAR; *wp++ = CHAR;
@@ -445,7 +446,8 @@ yylex(int cf)
statep->ls_sadelim.nparen = 0; statep->ls_sadelim.nparen = 0;
break; break;
} }
/* If this is a trim operation, /*
* If this is a trim operation,
* treat (,|,) specially in STBRACE. * treat (,|,) specially in STBRACE.
*/ */
if (ctype(c, C_SUBOP2)) { if (ctype(c, C_SUBOP2)) {
@@ -499,7 +501,8 @@ yylex(int cf)
subst_gravis: subst_gravis:
PUSH_STATE(SBQUOTE); PUSH_STATE(SBQUOTE);
*wp++ = COMSUB; *wp++ = COMSUB;
/* Need to know if we are inside double quotes /*
* Need to know if we are inside double quotes
* since sh/AT&T-ksh translate the \" to " in * since sh/AT&T-ksh translate the \" to " in
* "`...\"...`". * "`...\"...`".
* This is not done in POSIX mode (section * This is not done in POSIX mode (section
@@ -600,92 +603,43 @@ yylex(int cf)
goto Subst; goto Subst;
break; break;
case SCSPAREN: /* $( ... ) */ /* $(( ... )) */
/* todo: deal with $(...) quoting properly case SASPAREN:
* kludge to partly fake quoting inside $(...): doesn't /*
* really work because nested $(...) or ${...} inside * XXX should nest using existing state machine
* double quotes aren't dealt with. * (embed "...", $(...), etc.)
*/ */
switch (statep->ls_scsparen.csstate) {
case 0: /* normal */
switch (c) {
case '(':
statep->ls_scsparen.nparen++;
break;
case ')':
statep->ls_scsparen.nparen--;
break;
case '\\':
statep->ls_scsparen.csstate = 1;
break;
case '"':
statep->ls_scsparen.csstate = 2;
break;
case '\'':
statep->ls_scsparen.csstate = 4;
ignore_backslash_newline++;
break;
}
break;
case 1: /* backslash in normal mode */
case 3: /* backslash in double quotes */
--statep->ls_scsparen.csstate;
break;
case 2: /* double quotes */
if (c == '"')
statep->ls_scsparen.csstate = 0;
else if (c == '\\')
statep->ls_scsparen.csstate = 3;
break;
case 4: /* single quotes */
if (c == '\'') {
statep->ls_scsparen.csstate = 0;
ignore_backslash_newline--;
}
break;
}
if (statep->ls_scsparen.nparen == 0) {
POP_STATE();
*wp++ = 0; /* end of COMSUB */
} else
*wp++ = c;
break;
case SASPAREN: /* $(( ... )) */
/* XXX should nest using existing state machine
* (embed "...", $(...), etc.) */
if (c == '(') if (c == '(')
statep->ls_sasparen.nparen++; statep->ls_sasparen.nparen++;
else if (c == ')') { else if (c == ')') {
statep->ls_sasparen.nparen--; statep->ls_sasparen.nparen--;
if (statep->ls_sasparen.nparen == 1) { if (statep->ls_sasparen.nparen == 1) {
/*(*/ POP_STATE();
if ((c2 = getsc()) == ')') { if ((c2 = getsc()) == /*(*/')') {
POP_STATE();
/* end of EXPRSUB */ /* end of EXPRSUB */
*wp++ = 0; *wp++ = 0;
break; break;
} else { } else {
char *s; Source *s;
ungetsc(c2); /*
/* mismatched parenthesis - * mismatched parenthesis -
* assume we were really * assume we were really
* parsing a $(...) expression * parsing a $(...) expression
*/ */
s = Xrestpos(ws, wp, *wp = EOS;
statep->ls_sasparen.start); wp = Xstring(ws, wp);
memmove(s + 1, s, wp - s); /* dp = $((blah))\0 */
*s++ = COMSUB; dp = wdstrip(wp, true, false);
*s = '('; /*)*/ s = pushs(SREREAD,
wp++; source->areap);
statep->ls_scsparen.nparen = 1; s->start = s->str =
statep->ls_scsparen.csstate = 0; (s->u.freeme = dp) + 2;
state = statep->ls_state = dp[strlen(dp) - 1] = c2;
SCSPAREN; /* s->str = (blah)C\0 */
s->next = source;
source = s;
goto subst_command;
} }
} }
} }
@@ -738,7 +692,8 @@ yylex(int cf)
*wp++ = SPAT; *wp++ = SPAT;
} else if (c == '(') { } else if (c == '(') {
*wp++ = OPAT; *wp++ = OPAT;
*wp++ = ' '; /* simile for @ */ /* simile for @ */
*wp++ = ' ';
PUSH_STATE(SPATTERN); PUSH_STATE(SPATTERN);
} else } else
goto Sbase1; goto Sbase1;
@@ -772,12 +727,13 @@ yylex(int cf)
*wp++ = c; *wp++ = c;
break; break;
case SWORD: /* ONEWORD */ /* ONEWORD */
case SWORD:
goto Subst; goto Subst;
case SLETPAREN: /* LETEXPR: (( ... )) */ /* LETEXPR: (( ... )) */
/*(*/ case SLETPAREN:
if (c == ')') { if (c == /*(*/ ')') {
if (statep->ls_sletparen.nparen > 0) if (statep->ls_sletparen.nparen > 0)
--statep->ls_sletparen.nparen; --statep->ls_sletparen.nparen;
else if ((c2 = getsc()) == /*(*/ ')') { else if ((c2 = getsc()) == /*(*/ ')') {
@@ -788,9 +744,10 @@ yylex(int cf)
Source *s; Source *s;
ungetsc(c2); ungetsc(c2);
/* mismatched parenthesis - /*
* mismatched parenthesis -
* assume we were really * assume we were really
* parsing a $(...) expression * parsing a (...) expression
*/ */
*wp = EOS; *wp = EOS;
sp = Xstring(ws, wp); sp = Xstring(ws, wp);
@@ -802,15 +759,17 @@ yylex(int cf)
return ('('/*)*/); return ('('/*)*/);
} }
} else if (c == '(') } else if (c == '(')
/* parenthesis inside quotes and backslashes /*
* are lost, but AT&T ksh doesn't count them * parenthesis inside quotes and
* either * backslashes are lost, but AT&T ksh
* doesn't count them either
*/ */
++statep->ls_sletparen.nparen; ++statep->ls_sletparen.nparen;
goto Sbase2; goto Sbase2;
#ifndef MKSH_SMALL #ifndef MKSH_SMALL
case SLETARRAY: /* LETARRAY: =( ... ) */ /* LETARRAY: =( ... ) */
case SLETARRAY:
if (c == '('/*)*/) if (c == '('/*)*/)
++statep->ls_sletarray.nparen; ++statep->ls_sletarray.nparen;
else if (c == /*(*/')') else if (c == /*(*/')')
@@ -823,7 +782,8 @@ yylex(int cf)
break; break;
#endif #endif
case SHERESTRING: /* <<< delimiter */ /* <<< delimiter */
case SHERESTRING:
if (c == '\\') { if (c == '\\') {
c = getsc(); c = getsc();
if (c) { if (c) {
@@ -861,13 +821,16 @@ yylex(int cf)
} }
break; break;
case SHEREDELIM: /* <<,<<- delimiter */ /* <<,<<- delimiter */
/* XXX chuck this state (and the next) - use case SHEREDELIM:
/*
* XXX chuck this state (and the next) - use
* the existing states ($ and \`...` should be * the existing states ($ and \`...` should be
* stripped of their specialness after the * stripped of their specialness after the
* fact). * fact).
*/ */
/* here delimiters need a special case since /*
* here delimiters need a special case since
* $ and `...` are not to be treated specially * $ and `...` are not to be treated specially
*/ */
if (c == '\\') { if (c == '\\') {
@@ -902,7 +865,8 @@ yylex(int cf)
} }
break; break;
case SHEREDQUOTE: /* " in <<,<<- delimiter */ /* " in <<,<<- delimiter */
case SHEREDQUOTE:
if (c == '"') { if (c == '"') {
*wp++ = CQUOTE; *wp++ = CQUOTE;
state = statep->ls_state = state = statep->ls_state =
@@ -929,15 +893,17 @@ yylex(int cf)
} }
break; break;
case SPATTERN: /* in *(...|...) pattern (*+?@!) */ /* in *(...|...) pattern (*+?@!) */
if ( /*(*/ c == ')') { case SPATTERN:
if (c == /*(*/ ')') {
*wp++ = CPAT; *wp++ = CPAT;
POP_STATE(); POP_STATE();
} else if (c == '|') { } else if (c == '|') {
*wp++ = SPAT; *wp++ = SPAT;
} else if (c == '(') { } else if (c == '(') {
*wp++ = OPAT; *wp++ = OPAT;
*wp++ = ' '; /* simile for @ */ /* simile for @ */
*wp++ = ' ';
PUSH_STATE(SPATTERN); PUSH_STATE(SPATTERN);
} else } else
goto Sbase1; goto Sbase1;
@@ -1010,7 +976,8 @@ yylex(int cf)
iop->name = NULL; iop->name = NULL;
iop->delim = NULL; iop->delim = NULL;
iop->heredoc = NULL; iop->heredoc = NULL;
Xfree(ws, wp); /* free word */ /* free word */
Xfree(ws, wp);
yylval.iop = iop; yylval.iop = iop;
return (REDIR); return (REDIR);
no_iop: no_iop:
@@ -1018,7 +985,8 @@ yylex(int cf)
} }
if (wp == dp && state == SBASE) { if (wp == dp && state == SBASE) {
Xfree(ws, wp); /* free word */ /* free word */
Xfree(ws, wp);
/* no word, process LEX1 character */ /* no word, process LEX1 character */
if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
if ((c2 = getsc()) == c) if ((c2 = getsc()) == c)
@@ -1040,7 +1008,8 @@ yylex(int cf)
return (c); return (c);
} }
*wp++ = EOS; /* terminate word */ /* terminate word */
*wp++ = EOS;
yylval.cp = Xclose(ws, wp); yylval.cp = Xclose(ws, wp);
if (state == SWORD || state == SLETPAREN if (state == SWORD || state == SLETPAREN
/* XXX ONEWORD? */ /* XXX ONEWORD? */
@@ -1075,15 +1044,16 @@ yylex(int cf)
/* Make sure the ident array stays '\0' padded */ /* Make sure the ident array stays '\0' padded */
memset(dp, 0, (ident+IDENT) - dp + 1); memset(dp, 0, (ident+IDENT) - dp + 1);
if (c != EOS) if (c != EOS)
*ident = '\0'; /* word is not unquoted */ /* word is not unquoted */
*ident = '\0';
if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
struct tbl *p; struct tbl *p;
uint32_t h = hash(ident); uint32_t h = hash(ident);
/* { */
if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
(!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) { (!(cf & ESACONLY) || p->val.i == ESAC ||
p->val.i == /*{*/ '}')) {
afree(yylval.cp, ATEMP); afree(yylval.cp, ATEMP);
return (p->val.i); return (p->val.i);
} }
@@ -1205,7 +1175,8 @@ readhere(struct ioword *iop)
Xput(xs, xp, c); Xput(xs, xp, c);
eofp++; eofp++;
} }
/* Allow EOF here so commands with out trailing newlines /*
* Allow EOF here so commands with out trailing newlines
* will work (eg, ksh -c '...', $(...), etc). * will work (eg, ksh -c '...', $(...), etc).
*/ */
if (*eofp == '\0' && (c == 0 || c == '\n')) { if (*eofp == '\0' && (c == 0 || c == '\n')) {
@@ -1238,7 +1209,8 @@ yyerror(const char *fmt, ...)
/* pop aliases and re-reads */ /* pop aliases and re-reads */
while (source->type == SALIAS || source->type == SREREAD) while (source->type == SALIAS || source->type == SREREAD)
source = source->next; source = source->next;
source->str = null; /* zap pending input */ /* zap pending input */
source->str = null;
error_prefix(true); error_prefix(true);
va_start(va, fmt); va_start(va, fmt);
@@ -1274,7 +1246,8 @@ getsc__(void)
getsc_again: getsc_again:
while ((c = *s->str++) == 0) { while ((c = *s->str++) == 0) {
s->str = NULL; /* return 0 for EOF by default */ /* return 0 for EOF by default */
s->str = NULL;
switch (s->type) { switch (s->type) {
case SEOF: case SEOF:
s->str = null; s->str = null;
@@ -1314,23 +1287,26 @@ getsc__(void)
s = source; s = source;
} else if (*s->u.tblp->val.s && } else if (*s->u.tblp->val.s &&
(c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
source = s = s->next; /* pop source stack */ /* pop source stack */
/* Note that this alias ended with a space, source = s = s->next;
* enabling alias expansion on the following /*
* word. * Note that this alias ended with a
* space, enabling alias expansion on
* the following word.
*/ */
s->flags |= SF_ALIAS; s->flags |= SF_ALIAS;
} else { } else {
/* At this point, we need to keep the current /*
* At this point, we need to keep the current
* alias in the source list so recursive * alias in the source list so recursive
* aliases can be detected and we also need * aliases can be detected and we also need to
* to return the next character. Do this * return the next character. Do this by
* by temporarily popping the alias to get * temporarily popping the alias to get the
* the next character and then put it back * next character and then put it back in the
* in the source list with the SF_ALIASEND * source list with the SF_ALIASEND flag set.
* flag set.
*/ */
source = s->next; /* pop source stack */ /* pop source stack */
source = s->next;
source->flags |= s->flags & SF_ALIAS; source->flags |= s->flags & SF_ALIAS;
c = getsc__(); c = getsc__();
if (c) { if (c) {
@@ -1341,7 +1317,7 @@ getsc__(void)
source = s; source = s;
} else { } else {
s = source; s = source;
/* avoid reading eof twice */ /* avoid reading EOF twice */
s->str = NULL; s->str = NULL;
break; break;
} }
@@ -1349,7 +1325,8 @@ getsc__(void)
continue; continue;
case SREREAD: case SREREAD:
if (s->start != s->ugbuf) /* yuck */ if (s->start != s->ugbuf)
/* yuck */
afree(s->u.freeme, ATEMP); afree(s->u.freeme, ATEMP);
source = s = s->next; source = s = s->next;
continue; continue;
@@ -1404,7 +1381,8 @@ getsc_line(Source *s)
int nread; int nread;
nread = x_read(xp, LINE); nread = x_read(xp, LINE);
if (nread < 0) /* read error */ if (nread < 0)
/* read error */
nread = 0; nread = 0;
xp[nread] = '\0'; xp[nread] = '\0';
xp += nread; xp += nread;
@@ -1427,20 +1405,24 @@ getsc_line(Source *s)
if (!p || (xp = p, xp[-1] == '\n')) if (!p || (xp = p, xp[-1] == '\n'))
break; break;
/* double buffer size */ /* double buffer size */
xp++; /* move past NUL so doubling works... */ /* move past NUL so doubling works... */
xp++;
XcheckN(s->xs, xp, Xlength(s->xs, xp)); XcheckN(s->xs, xp, Xlength(s->xs, xp));
xp--; /* ...and move back again */ /* ...and move back again */
xp--;
} }
/* flush any unwanted input so other programs/builtins /*
* flush any unwanted input so other programs/builtins
* can read it. Not very optimal, but less error prone * can read it. Not very optimal, but less error prone
* than flushing else where, dealing with redirections, * than flushing else where, dealing with redirections,
* etc. * etc.
* todo: reduce size of shf buffer (~128?) if SSTDIN * TODO: reduce size of shf buffer (~128?) if SSTDIN
*/ */
if (s->type == SSTDIN) if (s->type == SSTDIN)
shf_flush(s->u.shf); shf_flush(s->u.shf);
} }
/* XXX: temporary kludge to restore source after a /*
* XXX: temporary kludge to restore source after a
* trap may have been executed. * trap may have been executed.
*/ */
source = s; source = s;
@@ -1498,8 +1480,10 @@ set_prompt(int to, Source *s)
cur_prompt = to; cur_prompt = to;
switch (to) { switch (to) {
case PS1: /* command */ /* command */
/* Substitute ! and !! here, before substitutions are done case PS1:
/*
* Substitute ! and !! here, before substitutions are done
* so ! in expanded variables are not expanded. * so ! in expanded variables are not expanded.
* NOTE: this is not what AT&T ksh does (it does it after * NOTE: this is not what AT&T ksh does (it does it after
* substitutions, POSIX doesn't say which is to be done. * substitutions, POSIX doesn't say which is to be done.
@@ -1523,7 +1507,8 @@ set_prompt(int to, Source *s)
newenv(E_ERRH); newenv(E_ERRH);
if (sigsetjmp(e->jbuf, 0)) { if (sigsetjmp(e->jbuf, 0)) {
prompt = safe_prompt; prompt = safe_prompt;
/* Don't print an error - assume it has already /*
* Don't print an error - assume it has already
* been printed. Reason is we may have forked * been printed. Reason is we may have forked
* to run a command and the child may be * to run a command and the child may be
* unwinding its stack through this code as it * unwinding its stack through this code as it
@@ -1536,7 +1521,8 @@ set_prompt(int to, Source *s)
quitenv(NULL); quitenv(NULL);
} }
break; break;
case PS2: /* command continuation */ /* command continuation */
case PS2:
prompt = str_val(global("PS2")); prompt = str_val(global("PS2"));
break; break;
} }
@@ -1548,11 +1534,12 @@ dopprompt(const char *cp, int ntruncate, bool doprint)
int columns = 0, lines = 0, indelimit = 0; int columns = 0, lines = 0, indelimit = 0;
char delimiter = 0; char delimiter = 0;
/* Undocumented AT&T ksh feature: /*
* If the second char in the prompt string is \r then the first char * Undocumented AT&T ksh feature:
* is taken to be a non-printing delimiter and any chars between two * If the second char in the prompt string is \r then the first
* instances of the delimiter are not considered to be part of the * char is taken to be a non-printing delimiter and any chars
* prompt length * between two instances of the delimiter are not considered to
* be part of the prompt length
*/ */
if (*cp && cp[1] == '\r') { if (*cp && cp[1] == '\r') {
delimiter = *cp; delimiter = *cp;
@@ -1603,8 +1590,9 @@ promptlen(const char *cp)
return (dopprompt(cp, 0, false)); return (dopprompt(cp, 0, false));
} }
/* Read the variable part of a ${...} expression (ie, up to but not including /*
* the :[-+?=#%] or close-brace. * Read the variable part of a ${...} expression (i.e. up to but not
* including the :[-+?=#%] or close-brace).
*/ */
static char * static char *
get_brace_var(XString *wsp, char *wp) get_brace_var(XString *wsp, char *wp)
@@ -1649,7 +1637,8 @@ get_brace_var(XString *wsp, char *wp)
*wp++ = *p++; *wp++ = *p++;
} }
afree(tmp, ATEMP); afree(tmp, ATEMP);
c = getsc(); /* the ] */ /* the ] */
c = getsc();
} }
goto out; goto out;
} }
@@ -1665,7 +1654,8 @@ get_brace_var(XString *wsp, char *wp)
*wp++ = c; *wp++ = c;
} }
out: out:
*wp++ = '\0'; /* end of variable part */ /* end of variable part */
*wp++ = '\0';
ungetsc(c); ungetsc(c);
return (wp); return (wp);
} }
@@ -1679,9 +1669,9 @@ static int
arraysub(char **strp) arraysub(char **strp)
{ {
XString ws; XString ws;
char *wp; char *wp, c;
char c; /* we are just past the initial [ */
int depth = 1; /* we are just past the initial [ */ int depth = 1;
Xinit(ws, wp, 32, ATEMP); Xinit(ws, wp, 32, ATEMP);
@@ -1707,7 +1697,7 @@ ungetsc(int c)
{ {
if (backslash_skip) if (backslash_skip)
backslash_skip--; backslash_skip--;
/* Don't unget eof... */ /* Don't unget EOF... */
if (source->str == null && c == '\0') if (source->str == null && c == '\0')
return (source->str); return (source->str);
if (source->str > source->start) if (source->str > source->start)

3
sh.h
View File

@@ -154,7 +154,7 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.435 2011/03/05 21:48:09 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.436 2011/03/06 01:25:33 tg Exp $");
#endif #endif
#define MKSH_VERSION "R39 2011/02/18" #define MKSH_VERSION "R39 2011/02/18"
@@ -1737,6 +1737,7 @@ int shf_vfprintf(struct shf *, const char *, va_list)
void initkeywords(void); void initkeywords(void);
struct op *compile(Source *); struct op *compile(Source *);
bool parse_usec(const char *, struct timeval *); bool parse_usec(const char *, struct timeval *);
char *yyrecursive(void);
/* tree.c */ /* tree.c */
int fptreef(struct shf *, int, const char *, ...); int fptreef(struct shf *, int, const char *, ...);
char *snptreef(char *, int, const char *, ...); char *snptreef(char *, int, const char *, ...);

128
syn.c
View File

@@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.53 2011/02/11 00:41:38 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.54 2011/03/06 01:25:35 tg Exp $");
struct nesting_state { struct nesting_state {
int start_token; /* token than began nesting (eg, FOR) */ int start_token; /* token than began nesting (eg, FOR) */
@@ -32,7 +32,7 @@ struct nesting_state {
static void yyparse(void); static void yyparse(void);
static struct op *pipeline(int); static struct op *pipeline(int);
static struct op *andor(void); static struct op *andor(void);
static struct op *c_list(int); static struct op *c_list(bool);
static struct ioword *synio(int); static struct ioword *synio(int);
static struct op *nested(int, int, int); static struct op *nested(int, int, int);
static struct op *get_command(int); static struct op *get_command(int);
@@ -59,11 +59,11 @@ static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
static struct op *outtree; /* yyparse output */ static struct op *outtree; /* yyparse output */
static struct nesting_state nesting; /* \n changed to ; */ static struct nesting_state nesting; /* \n changed to ; */
static int reject; /* token(cf) gets symbol again */ static bool reject; /* token(cf) gets symbol again */
static int symbol; /* yylex value */ static int symbol; /* yylex value */
#define REJECT (reject = 1) #define REJECT (reject = true)
#define ACCEPT (reject = 0) #define ACCEPT (reject = false)
#define token(cf) ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf))) #define token(cf) ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
#define tpeek(cf) ((reject) ? (symbol) : (REJECT, symbol = yylex(cf))) #define tpeek(cf) ((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
#define musthave(c,cf) do { if (token(cf) != (c)) syntaxerr(NULL); } while (0) #define musthave(c,cf) do { if (token(cf) != (c)) syntaxerr(NULL); } while (0)
@@ -122,20 +122,23 @@ andor(void)
} }
static struct op * static struct op *
c_list(int multi) c_list(bool multi)
{ {
struct op *t = NULL, *p, *tl = NULL; struct op *t = NULL, *p, *tl = NULL;
int c, have_sep; int c;
bool have_sep;
while (1) { while (1) {
p = andor(); p = andor();
/* Token has always been read/rejected at this point, so /*
* Token has always been read/rejected at this point, so
* we don't worry about what flags to pass token() * we don't worry about what flags to pass token()
*/ */
c = token(0); c = token(0);
have_sep = 1; have_sep = true;
if (c == '\n' && (multi || inalias(source))) { if (c == '\n' && (multi || inalias(source))) {
if (!p) /* ignore blank lines */ if (!p)
/* ignore blank lines */
continue; continue;
} else if (!p) } else if (!p)
break; break;
@@ -143,7 +146,7 @@ c_list(int multi)
p = block(c == '&' ? TASYNC : TCOPROC, p = block(c == '&' ? TASYNC : TCOPROC,
p, NOBLOCK, NOWORDS); p, NOBLOCK, NOWORDS);
else if (c != ';') else if (c != ';')
have_sep = 0; have_sep = false;
if (!t) if (!t)
t = p; t = p;
else if (!tl) else if (!tl)
@@ -178,7 +181,8 @@ synio(int cf)
musthave(LWORD, ishere ? HEREDELIM : 0); musthave(LWORD, ishere ? HEREDELIM : 0);
if (ishere) { if (ishere) {
iop->delim = yylval.cp; iop->delim = yylval.cp;
if (*ident != 0) /* unquoted */ if (*ident != 0)
/* unquoted */
iop->flag |= IOEVAL; iop->flag |= IOEVAL;
if (herep > &heres[HERES - 1]) if (herep > &heres[HERES - 1])
yyerror("too many %ss\n", "<<"); yyerror("too many %ss\n", "<<");
@@ -243,7 +247,8 @@ get_command(int cf)
afree(iops, ATEMP); afree(iops, ATEMP);
XPfree(args); XPfree(args);
XPfree(vars); XPfree(vars);
return (NULL); /* empty line */ /* empty line */
return (NULL);
case LWORD: case LWORD:
case REDIR: case REDIR:
@@ -266,7 +271,8 @@ get_command(int cf)
case LWORD: case LWORD:
ACCEPT; ACCEPT;
/* the iopn == 0 and XPsize(vars) == 0 are /*
* the iopn == 0 and XPsize(vars) == 0 are
* dubious but AT&T ksh acts this way * dubious but AT&T ksh acts this way
*/ */
if (iopn == 0 && XPsize(vars) == 0 && if (iopn == 0 && XPsize(vars) == 0 &&
@@ -281,7 +287,8 @@ get_command(int cf)
break; break;
case '(': case '(':
/* Check for "> foo (echo hi)" which AT&T ksh /*
* Check for "> foo (echo hi)" which AT&T ksh
* allows (not POSIX, but not disallowed) * allows (not POSIX, but not disallowed)
*/ */
afree(t, ATEMP); afree(t, ATEMP);
@@ -294,13 +301,12 @@ get_command(int cf)
XPsize(vars) == 1 && is_wdvarassign(yylval.cp)) XPsize(vars) == 1 && is_wdvarassign(yylval.cp))
goto is_wdarrassign; goto is_wdarrassign;
#endif #endif
/* Must be a function */ /* must be a function */
if (iopn != 0 || XPsize(args) != 1 || if (iopn != 0 || XPsize(args) != 1 ||
XPsize(vars) != 0) XPsize(vars) != 0)
syntaxerr(NULL); syntaxerr(NULL);
ACCEPT; ACCEPT;
/*(*/ musthave(/*(*/')', 0);
musthave(')', 0);
t = function_body(XPptrv(args)[0], false); t = function_body(XPptrv(args)[0], false);
goto Leave; goto Leave;
#ifndef MKSH_SMALL #ifndef MKSH_SMALL
@@ -364,13 +370,13 @@ get_command(int cf)
CHAR, 't', EOS CHAR, 't', EOS
}; };
/* Leave KEYWORD in syniocf (allow if (( 1 )) then ...) */ /* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
lno = source->line; lno = source->line;
ACCEPT; ACCEPT;
switch (token(LETEXPR)) { switch (token(LETEXPR)) {
case LWORD: case LWORD:
break; break;
case '(': /* ) */ case '(': /*)*/
goto Subshell; goto Subshell;
default: default:
syntaxerr(NULL); syntaxerr(NULL);
@@ -383,7 +389,7 @@ get_command(int cf)
} }
case DBRACKET: /* [[ .. ]] */ case DBRACKET: /* [[ .. ]] */
/* Leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */ /* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
t = newtp(TDBRACKET); t = newtp(TDBRACKET);
ACCEPT; ACCEPT;
{ {
@@ -454,7 +460,8 @@ get_command(int cf)
t = pipeline(0); t = pipeline(0);
if (t) { if (t) {
t->str = alloc(2, ATEMP); t->str = alloc(2, ATEMP);
t->str[0] = '\0'; /* TF_* flags */ /* TF_* flags */
t->str[0] = '\0';
t->str[1] = '\0'; t->str[1] = '\0';
} }
t = block(TTIME, t, NOBLOCK, NOWORDS); t = block(TTIME, t, NOBLOCK, NOWORDS);
@@ -501,7 +508,8 @@ dogroup(void)
struct op *list; struct op *list;
c = token(CONTIN|KEYWORD|ALIAS); c = token(CONTIN|KEYWORD|ALIAS);
/* A {...} can be used instead of do...done for for/select loops /*
* A {...} can be used instead of do...done for for/select loops
* but not for while/until loops - we don't need to check if it * but not for while/until loops - we don't need to check if it
* is a while loop because it would have been parsed as part of * is a while loop because it would have been parsed as part of
* the conditional command list... * the conditional command list...
@@ -569,7 +577,8 @@ caselist(void)
else else
syntaxerr(NULL); syntaxerr(NULL);
t = tl = NULL; t = tl = NULL;
while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) { /* no ALIAS here */ /* no ALIAS here */
while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
struct op *tc = casepart(c); struct op *tc = casepart(c);
if (tl == NULL) if (tl == NULL)
t = tl = tc, tl->right = NULL; t = tl = tc, tl->right = NULL;
@@ -609,16 +618,18 @@ casepart(int endtok)
static struct op * static struct op *
function_body(char *name, function_body(char *name,
bool ksh_func) /* function foo { ... } vs foo() { .. } */ /* function foo { ... } vs foo() { .. } */
bool ksh_func)
{ {
char *sname, *p; char *sname, *p;
struct op *t; struct op *t;
bool old_func_parse; bool old_func_parse;
sname = wdstrip(name, false, false); sname = wdstrip(name, false, false);
/* Check for valid characters in name. POSIX and AT&T ksh93 say only /*-
* allow [a-zA-Z_0-9] but this allows more as old pdkshs have * Check for valid characters in name. POSIX and AT&T ksh93 say
* allowed more (the following were never allowed: * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
* have allowed more; the following were never allowed:
* NUL TAB NL SP " $ & ' ( ) ; < = > \ ` | * NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
* C_QUOTE covers all but adds # * ? [ ] * C_QUOTE covers all but adds # * ? [ ]
*/ */
@@ -626,13 +637,14 @@ function_body(char *name,
if (ctype(*p, C_QUOTE)) if (ctype(*p, C_QUOTE))
yyerror("%s: %s\n", sname, "invalid function name"); yyerror("%s: %s\n", sname, "invalid function name");
/* Note that POSIX allows only compound statements after foo(), sh and /*
* AT&T ksh allow any command, go with the later since it shouldn't * Note that POSIX allows only compound statements after foo(),
* break anything. However, for function foo, AT&T ksh only accepts * sh and AT&T ksh allow any command, go with the later since it
* an open-brace. * shouldn't break anything. However, for function foo, AT&T ksh
* only accepts an open-brace.
*/ */
if (ksh_func) { if (ksh_func) {
if (tpeek(CONTIN|KEYWORD|ALIAS) == '(' /* ) */) { if (tpeek(CONTIN|KEYWORD|ALIAS) == '(' /*)*/) {
struct tbl *tp; struct tbl *tp;
/* function foo () { */ /* function foo () { */
@@ -643,7 +655,7 @@ function_body(char *name,
if ((tp = ktsearch(&aliases, sname, hash(sname)))) if ((tp = ktsearch(&aliases, sname, hash(sname))))
ktdelete(tp); ktdelete(tp);
} }
musthave('{', CONTIN|KEYWORD|ALIAS); /* } */ musthave('{' /*}*/, CONTIN|KEYWORD|ALIAS);
REJECT; REJECT;
} }
@@ -689,7 +701,8 @@ wordlist(void)
XPinit(args, 16); XPinit(args, 16);
/* POSIX does not do alias expansion here... */ /* POSIX does not do alias expansion here... */
if ((c = token(CONTIN|KEYWORD|ALIAS)) != IN) { if ((c = token(CONTIN|KEYWORD|ALIAS)) != IN) {
if (c != ';') /* non-POSIX, but AT&T ksh accepts a ; here */ if (c != ';')
/* non-POSIX, but AT&T ksh accepts a ; here */
REJECT; REJECT;
return (NULL); return (NULL);
} }
@@ -766,7 +779,8 @@ initkeywords(void)
struct tbl *p; struct tbl *p;
ktinit(&keywords, APERM, ktinit(&keywords, APERM,
/* must be 80% of 2^n (currently 20 keywords) */ 32); /* must be 80% of 2^n (currently 20 keywords) */
32);
for (tt = tokentab; tt->name; tt++) { for (tt = tokentab; tt->name; tt++) {
if (tt->reserved) { if (tt->reserved) {
p = ktenter(&keywords, tt->name, hash(tt->name)); p = ktenter(&keywords, tt->name, hash(tt->name));
@@ -780,7 +794,8 @@ initkeywords(void)
static void static void
syntaxerr(const char *what) syntaxerr(const char *what)
{ {
char redir[6]; /* 2<<- is the longest redirection, I think */ /* 2<<- is the longest redirection, I think */
char redir[6];
const char *s; const char *s;
struct tokeninfo const *tt; struct tokeninfo const *tt;
int c; int c;
@@ -870,7 +885,8 @@ compile(Source *s)
return (outtree); return (outtree);
} }
/* This kludge exists to take care of sh/AT&T ksh oddity in which /*-
* This kludge exists to take care of sh/AT&T ksh oddity in which
* the arguments of alias/export/readonly/typeset have no field * the arguments of alias/export/readonly/typeset have no field
* splitting, file globbing, or (normal) tilde expansion done. * splitting, file globbing, or (normal) tilde expansion done.
* AT&T ksh seems to do something similar to this since * AT&T ksh seems to do something similar to this since
@@ -902,7 +918,8 @@ inalias(struct source *s)
} }
/* Order important - indexed by Test_meta values /*
* Order important - indexed by Test_meta values
* Note that ||, &&, ( and ) can't appear in as unquoted strings * Note that ||, &&, ( and ) can't appear in as unquoted strings
* in normal shell input, so these can be interpreted unambiguously * in normal shell input, so these can be interpreted unambiguously
* in the evaluation pass. * in the evaluation pass.
@@ -955,7 +972,8 @@ dbtestp_isa(Test_env *te, Test_meta meta)
db_lthan : db_gthan, ATEMP); db_lthan : db_gthan, ATEMP);
} else if (uqword && (ret = test_isop(meta, ident))) } else if (uqword && (ret = test_isop(meta, ident)))
save = yylval.cp; save = yylval.cp;
} else /* meta == TM_END */ } else
/* meta == TM_END */
ret = (uqword && !strcmp(yylval.cp, ret = (uqword && !strcmp(yylval.cp,
db_close)) ? TO_NONNULL : TO_NONOP; db_close)) ? TO_NONNULL : TO_NONOP;
if (ret != TO_NONOP) { if (ret != TO_NONOP) {
@@ -1063,3 +1081,31 @@ parse_usec(const char *s, struct timeval *tv)
return (false); return (false);
} }
#endif #endif
/*
* Helper function called from within lex.c:yylex() to parse
* a COMSUB recursively using the main shell parser and lexer
*/
char *
yyrecursive(void)
{
struct op *t;
char *cp;
bool old_reject;
int old_symbol;
/* push reject state, parse recursively, pop reject state */
old_reject = reject;
old_symbol = symbol;
ACCEPT;
/* we use TPAREN as a helper container here */
t = nested(TPAREN, '(', ')');
reject = old_reject;
symbol = old_symbol;
/* t->left because nested(TPAREN, ...) hides our goodies there */
cp = snptreef(NULL, 0, "%T", t->left);
tfree(t, ATEMP);
return (cp);
}