/* $OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $ */ /*- * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009 * Thorsten Glaser * * Provided that these terms and disclaimer and all copyright notices * are retained or reproduced in an accompanying document, permission * is granted to deal in this work without restriction, including un- * limited rights to use, publicly perform, distribute, sell, modify, * merge, give away, or sublicence. * * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to * the utmost extent permitted by applicable law, neither express nor * implied; without malicious intent or gross negligence. In no event * may a licensor, author or contributor be held liable for indirect, * direct, other damage, loss, or other issues arising in any way out * of dealing in the work, even if advised of the possibility of such * damage or existence of a defect, except proven that it results out * of said person's immediate fault when using the work as intended. */ #include "sh.h" __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.100 2009/10/04 12:45:22 tg Exp $"); /* * states while lexing word */ #define SBASE 0 /* outside any lexical constructs */ #define SWORD 1 /* implicit quoting for substitute() */ #define SLETPAREN 2 /* inside (( )), implicit quoting */ #define SSQUOTE 3 /* inside '' */ #define SDQUOTE 4 /* inside "" */ #define SBRACE 5 /* inside ${} */ #define SCSPAREN 6 /* inside $() */ #define SBQUOTE 7 /* inside `` */ #define SASPAREN 8 /* inside $(( )) */ #define SHEREDELIM 9 /* parsing <<,<<- delimiter */ #define SHEREDQUOTE 10 /* parsing " in <<,<<- delimiter */ #define SPATTERN 11 /* parsing *(...|...) pattern (*+?@!) */ #define STBRACE 12 /* parsing ${...[#%]...} */ #define SLETARRAY 13 /* inside =( ), just copy */ #define SADELIM 14 /* like SBASE, looking for delimiter */ #define SHERESTRING 15 /* parsing <<< string */ #define SEQUOTE 16 /* inside $'' */ /* Structure to keep track of the lexing state and the various pieces of info * needed for each particular state. */ typedef struct lex_state Lex_state; struct lex_state { int ls_state; union { /* $(...) */ struct scsparen_info { int nparen; /* count open parenthesis */ int csstate; /* XXX remove */ #define ls_scsparen ls_info.u_scsparen } u_scsparen; /* $((...)) */ struct sasparen_info { int nparen; /* count open parenthesis */ int start; /* marks start of $(( in output str */ #define ls_sasparen ls_info.u_sasparen } u_sasparen; /* ((...)) */ struct sletparen_info { int nparen; /* count open parenthesis */ #define ls_sletparen ls_info.u_sletparen } u_sletparen; /* `...` */ struct sbquote_info { int indquotes; /* true if in double quotes: "`...`" */ #define ls_sbquote ls_info.u_sbquote } u_sbquote; #ifndef MKSH_SMALL /* =(...) */ struct sletarray_info { int nparen; /* count open parentheses */ #define ls_sletarray ls_info.u_sletarray } u_sletarray; #endif /* ADELIM */ struct sadelim_info { unsigned char nparen; /* count open parentheses */ #define SADELIM_BASH 0 #define SADELIM_MAKE 1 unsigned char style; unsigned char delimiter; unsigned char num; unsigned char flags; /* ofs. into sadelim_flags[] */ #define ls_sadelim ls_info.u_sadelim } u_sadelim; /* $'...' */ struct sequote_info { bool got_NUL; /* ignore rest of string */ #define ls_sequote ls_info.u_sequote } u_sequote; Lex_state *base; /* used to point to next state block */ } ls_info; }; typedef struct { Lex_state *base; Lex_state *end; } State_info; static void readhere(struct ioword *); static int getsc__(void); static void getsc_line(Source *); static int getsc_bn(void); static int s_get(void); static void s_put(int); static char *get_brace_var(XString *, char *); static int arraysub(char **); static const char *ungetsc(int); static void gethere(bool); static Lex_state *push_state_(State_info *, Lex_state *); static Lex_state *pop_state_(State_info *, Lex_state *); static int dopprompt(const char *, int, bool); static int backslash_skip; static int ignore_backslash_newline; /* optimised getsc_bn() */ #define _getsc() (*source->str != '\0' && *source->str != '\\' \ && !backslash_skip && !(source->flags & SF_FIRST) \ ? *source->str++ : getsc_bn()) /* optimised getsc__() */ #define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \ ? *source->str++ : getsc__()) #ifdef MKSH_SMALL static int getsc(void); static int getsc_(void); static int getsc(void) { return (_getsc()); } static int getsc_(void) { return (_getsc_()); } #else /* !MKSH_SMALL: use them inline */ #define getsc() _getsc() #define getsc_() _getsc_() #endif #define STATE_BSIZE 32 #define PUSH_STATE(s) do { \ if (++statep == state_info.end) \ statep = push_state_(&state_info, statep); \ state = statep->ls_state = (s); \ } while (0) #define POP_STATE() do { \ if (--statep == state_info.base) \ statep = pop_state_(&state_info, statep); \ state = statep->ls_state; \ } while (0) /** * Lexical analyser * * tokens are not regular expressions, they are LL(1). * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". * hence the state stack. */ int yylex(int cf) { Lex_state states[STATE_BSIZE], *statep, *s2, *base; State_info state_info; int c, c2, state; XString ws; /* expandable output word */ char *wp; /* output word pointer */ char *sp, *dp; Again: states[0].ls_state = -1; states[0].ls_info.base = NULL; statep = &states[1]; state_info.base = states; state_info.end = &state_info.base[STATE_BSIZE]; Xinit(ws, wp, 64, ATEMP); backslash_skip = 0; ignore_backslash_newline = 0; if (cf&ONEWORD) state = SWORD; else if (cf&LETEXPR) { *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ state = SLETPAREN; statep->ls_sletparen.nparen = 0; #ifndef MKSH_SMALL } else if (cf&LETARRAY) { state = SLETARRAY; statep->ls_sletarray.nparen = 0; #endif } else { /* normal lexing */ state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; while ((c = getsc()) == ' ' || c == '\t') ; if (c == '#') { ignore_backslash_newline++; while ((c = getsc()) != '\0' && c != '\n') ; ignore_backslash_newline--; } ungetsc(c); } if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ source->flags &= ~SF_ALIAS; cf |= ALIAS; } /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ statep->ls_state = state; /* check for here string */ if (state == SHEREDELIM) { c = getsc(); if (c == '<') { state = SHERESTRING; while ((c = getsc()) == ' ' || c == '\t') ; ungetsc(c); c = '<'; goto accept_nonword; } ungetsc(c); } /* collect non-special or quoted characters to form word */ while (!((c = getsc()) == 0 || ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) && ctype(c, C_LEX1)))) { accept_nonword: Xcheck(ws, wp); switch (state) { case SADELIM: if (c == '(') statep->ls_sadelim.nparen++; else if (c == ')') statep->ls_sadelim.nparen--; else if (statep->ls_sadelim.nparen == 0 && (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) { *wp++ = ADELIM; *wp++ = c; if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0) POP_STATE(); if (c == /*{*/ '}') POP_STATE(); break; } /* FALLTHROUGH */ case SBASE: if (c == '[' && (cf & (VARASN|ARRAYVAR))) { *wp = EOS; /* temporary */ if (is_wdvarname(Xstring(ws, wp), false)) { char *p, *tmp; if (arraysub(&tmp)) { *wp++ = CHAR; *wp++ = c; for (p = tmp; *p; ) { Xcheck(ws, wp); *wp++ = CHAR; *wp++ = *p++; } afree(tmp, ATEMP); break; } else { Source *s; s = pushs(SREREAD, source->areap); s->start = s->str = s->u.freeme = tmp; s->next = source; source = s; } } *wp++ = CHAR; *wp++ = c; break; } /* FALLTHRU */ Sbase1: /* includes *(...|...) pattern (*+?@!) */ if (c == '*' || c == '@' || c == '+' || c == '?' || c == '!') { c2 = getsc(); if (c2 == '(' /*)*/ ) { *wp++ = OPAT; *wp++ = c; PUSH_STATE(SPATTERN); break; } ungetsc(c2); } /* FALLTHRU */ Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ switch (c) { case '\\': c = getsc(); if (c) /* trailing \ is lost */ *wp++ = QCHAR, *wp++ = c; break; case '\'': *wp++ = OQUOTE; ignore_backslash_newline++; PUSH_STATE(SSQUOTE); break; case '"': *wp++ = OQUOTE; PUSH_STATE(SDQUOTE); break; default: goto Subst; } break; Subst: switch (c) { case '\\': c = getsc(); switch (c) { case '"': if ((cf & HEREDOC)) goto heredocquote; /* FALLTHROUGH */ case '\\': case '$': case '`': *wp++ = QCHAR, *wp++ = c; break; default: heredocquote: Xcheck(ws, wp); if (c) { /* trailing \ is lost */ *wp++ = CHAR, *wp++ = '\\'; *wp++ = CHAR, *wp++ = c; } break; } break; case '$': c = getsc(); if (c == '(') /*)*/ { c = getsc(); if (c == '(') /*)*/ { PUSH_STATE(SASPAREN); statep->ls_sasparen.nparen = 2; statep->ls_sasparen.start = Xsavepos(ws, wp); *wp++ = EXPRSUB; } else { ungetsc(c); PUSH_STATE(SCSPAREN); statep->ls_scsparen.nparen = 1; statep->ls_scsparen.csstate = 0; *wp++ = COMSUB; } } else if (c == '{') /*}*/ { *wp++ = OSUBST; *wp++ = '{'; /*}*/ wp = get_brace_var(&ws, wp); c = getsc(); /* allow :# and :% (ksh88 compat) */ if (c == ':') { *wp++ = CHAR, *wp++ = c; c = getsc(); if (c == ':') { *wp++ = CHAR; *wp++ = '0'; *wp++ = ADELIM; *wp++ = ':'; PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_sadelim.style = SADELIM_BASH; statep->ls_sadelim.delimiter = ':'; statep->ls_sadelim.num = 1; statep->ls_sadelim.nparen = 0; break; } else if (ksh_isdigit(c) || c == '('/*)*/ || c == ' ' || c == '$' /* XXX what else? */) { /* substring subst. */ if (c != ' ') { *wp++ = CHAR; *wp++ = ' '; } ungetsc(c); PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_sadelim.style = SADELIM_BASH; statep->ls_sadelim.delimiter = ':'; statep->ls_sadelim.num = 2; statep->ls_sadelim.nparen = 0; break; } } else if (c == '/') { *wp++ = CHAR, *wp++ = c; if ((c = getsc()) == '/') { *wp++ = ADELIM; *wp++ = c; } else ungetsc(c); PUSH_STATE(SBRACE); PUSH_STATE(SADELIM); statep->ls_sadelim.style = SADELIM_BASH; statep->ls_sadelim.delimiter = '/'; statep->ls_sadelim.num = 1; statep->ls_sadelim.nparen = 0; break; } /* If this is a trim operation, * treat (,|,) specially in STBRACE. */ if (ctype(c, C_SUBOP2)) { ungetsc(c); PUSH_STATE(STBRACE); } else { ungetsc(c); PUSH_STATE(SBRACE); } } else if (ksh_isalphx(c)) { *wp++ = OSUBST; *wp++ = 'X'; do { Xcheck(ws, wp); *wp++ = c; c = getsc(); } while (ksh_isalnux(c)); *wp++ = '\0'; *wp++ = CSUBST; *wp++ = 'X'; ungetsc(c); } else if (ctype(c, C_VAR1 | C_DIGIT)) { Xcheck(ws, wp); *wp++ = OSUBST; *wp++ = 'X'; *wp++ = c; *wp++ = '\0'; *wp++ = CSUBST; *wp++ = 'X'; } else if (c == '\'' && (state == SBASE)) { /* XXX which other states are valid? */ *wp++ = OQUOTE; ignore_backslash_newline++; PUSH_STATE(SEQUOTE); statep->ls_sequote.got_NUL = false; break; } else { *wp++ = CHAR, *wp++ = '$'; ungetsc(c); } break; case '`': PUSH_STATE(SBQUOTE); *wp++ = COMSUB; /* Need to know if we are inside double quotes * since sh/AT&T-ksh translate the \" to " in * "`...\"...`". * This is not done in POSIX mode (section * 3.2.3, Double Quotes: "The backquote shall * retain its special meaning introducing the * other form of command substitution (see * 3.6.3). The portion of the quoted string * from the initial backquote and the * characters up to the next backquote that * is not preceded by a backslash (having * escape characters removed) defines that * command whose output replaces `...` when * the word is expanded." * Section 3.6.3, Command Substitution: * "Within the backquoted style of command * substitution, backslash shall retain its * literal meaning, except when followed by * $ ` \."). */ statep->ls_sbquote.indquotes = 0; s2 = statep; base = state_info.base; while (1) { for (; s2 != base; s2--) { if (s2->ls_state == SDQUOTE) { statep->ls_sbquote.indquotes = 1; break; } } if (s2 != base) break; if (!(s2 = s2->ls_info.base)) break; base = s2-- - STATE_BSIZE; } break; case QCHAR: if (cf & LQCHAR) { *wp++ = QCHAR; *wp++ = getsc(); break; } /* FALLTHRU */ default: *wp++ = CHAR, *wp++ = c; } break; case SEQUOTE: if (c == '\'') { POP_STATE(); *wp++ = CQUOTE; ignore_backslash_newline--; } else if (c == '\\') { if ((c2 = unbksl(true, s_get, s_put)) == -1) c2 = s_get(); if (c2 == 0) statep->ls_sequote.got_NUL = true; if (!statep->ls_sequote.got_NUL) { char ts[4]; if ((unsigned int)c2 < 0x100) *wp++ = QCHAR, *wp++ = c2; else { c = utf_wctomb(ts, c2 - 0x100); ts[c] = 0; for (c = 0; ts[c]; ++c) *wp++ = QCHAR, \ *wp++ = ts[c]; } } } else if (!statep->ls_sequote.got_NUL) *wp++ = QCHAR, *wp++ = c; break; case SSQUOTE: if (c == '\'') { POP_STATE(); *wp++ = CQUOTE; ignore_backslash_newline--; } else *wp++ = QCHAR, *wp++ = c; break; case SDQUOTE: if (c == '"') { POP_STATE(); *wp++ = CQUOTE; } else goto Subst; break; case SCSPAREN: /* $( ... ) */ /* todo: deal with $(...) quoting properly * kludge to partly fake quoting inside $(...): doesn't * really work because nested $(...) or ${...} inside * double quotes aren't dealt with. */ switch (statep->ls_scsparen.csstate) { case 0: /* normal */ switch (c) { case '(': statep->ls_scsparen.nparen++; break; case ')': statep->ls_scsparen.nparen--; break; case '\\': statep->ls_scsparen.csstate = 1; break; case '"': statep->ls_scsparen.csstate = 2; break; case '\'': statep->ls_scsparen.csstate = 4; ignore_backslash_newline++; break; } break; case 1: /* backslash in normal mode */ case 3: /* backslash in double quotes */ --statep->ls_scsparen.csstate; break; case 2: /* double quotes */ if (c == '"') statep->ls_scsparen.csstate = 0; else if (c == '\\') statep->ls_scsparen.csstate = 3; break; case 4: /* single quotes */ if (c == '\'') { statep->ls_scsparen.csstate = 0; ignore_backslash_newline--; } break; } if (statep->ls_scsparen.nparen == 0) { POP_STATE(); *wp++ = 0; /* end of COMSUB */ } else *wp++ = c; break; case SASPAREN: /* $(( ... )) */ /* XXX should nest using existing state machine * (embed "...", $(...), etc.) */ if (c == '(') statep->ls_sasparen.nparen++; else if (c == ')') { statep->ls_sasparen.nparen--; if (statep->ls_sasparen.nparen == 1) { /*(*/ if ((c2 = getsc()) == ')') { POP_STATE(); *wp++ = 0; /* end of EXPRSUB */ break; } else { char *s; ungetsc(c2); /* mismatched parenthesis - * assume we were really * parsing a $(...) expression */ s = Xrestpos(ws, wp, statep->ls_sasparen.start); memmove(s + 1, s, wp - s); *s++ = COMSUB; *s = '('; /*)*/ wp++; statep->ls_scsparen.nparen = 1; statep->ls_scsparen.csstate = 0; state = statep->ls_state = SCSPAREN; } } } *wp++ = c; break; case SBRACE: /*{*/ if (c == '}') { POP_STATE(); *wp++ = CSUBST; *wp++ = /*{*/ '}'; } else goto Sbase1; break; case STBRACE: /* Same as SBRACE, except (,|,) treated specially */ /*{*/ if (c == '}') { POP_STATE(); *wp++ = CSUBST; *wp++ = /*{*/ '}'; } else if (c == '|') { *wp++ = SPAT; } else if (c == '(') { *wp++ = OPAT; *wp++ = ' '; /* simile for @ */ PUSH_STATE(SPATTERN); } else goto Sbase1; break; case SBQUOTE: if (c == '`') { *wp++ = 0; POP_STATE(); } else if (c == '\\') { switch (c = getsc()) { case '\\': case '$': case '`': *wp++ = c; break; case '"': if (statep->ls_sbquote.indquotes) { *wp++ = c; break; } /* FALLTHRU */ default: if (c) { /* trailing \ is lost */ *wp++ = '\\'; *wp++ = c; } break; } } else *wp++ = c; break; case SWORD: /* ONEWORD */ goto Subst; case SLETPAREN: /* LETEXPR: (( ... )) */ /*(*/ if (c == ')') { if (statep->ls_sletparen.nparen > 0) --statep->ls_sletparen.nparen; else if ((c2 = getsc()) == /*(*/ ')') { c = 0; *wp++ = CQUOTE; goto Done; } else { Source *s; ungetsc(c2); /* mismatched parenthesis - * assume we were really * parsing a $(...) expression */ *wp = EOS; sp = Xstring(ws, wp); dp = wdstrip(sp, true, false); s = pushs(SREREAD, source->areap); s->start = s->str = s->u.freeme = dp; s->next = source; source = s; return ('('/*)*/); } } else if (c == '(') /* parenthesis inside quotes and backslashes * are lost, but AT&T ksh doesn't count them * either */ ++statep->ls_sletparen.nparen; goto Sbase2; #ifndef MKSH_SMALL case SLETARRAY: /* LETARRAY: =( ... ) */ if (c == '('/*)*/) ++statep->ls_sletarray.nparen; else if (c == /*(*/')') if (statep->ls_sletarray.nparen-- == 0) { c = 0; goto Done; } *wp++ = CHAR, *wp++ = c; break; #endif case SHERESTRING: /* <<< delimiter */ if (c == '\\') { c = getsc(); if (c) { /* trailing \ is lost */ *wp++ = QCHAR; *wp++ = c; } /* invoke quoting mode */ Xstring(ws, wp)[0] = QCHAR; } else if (c == '$') { if ((c2 = getsc()) == '\'') { PUSH_STATE(SEQUOTE); statep->ls_sequote.got_NUL = false; goto sherestring_quoted; } ungetsc(c2); goto sherestring_regular; } else if (c == '\'') { PUSH_STATE(SSQUOTE); sherestring_quoted: *wp++ = OQUOTE; ignore_backslash_newline++; /* invoke quoting mode */ Xstring(ws, wp)[0] = QCHAR; } else if (c == '"') { state = statep->ls_state = SHEREDQUOTE; *wp++ = OQUOTE; /* just don't IFS split; no quoting mode */ } else { sherestring_regular: *wp++ = CHAR; *wp++ = c; } break; case SHEREDELIM: /* <<,<<- delimiter */ /* XXX chuck this state (and the next) - use * the existing states ($ and \`...` should be * stripped of their specialness after the * fact). */ /* here delimiters need a special case since * $ and `...` are not to be treated specially */ if (c == '\\') { c = getsc(); if (c) { /* trailing \ is lost */ *wp++ = QCHAR; *wp++ = c; } } else if (c == '$') { if ((c2 = getsc()) == '\'') { PUSH_STATE(SEQUOTE); statep->ls_sequote.got_NUL = false; goto sheredelim_quoted; } ungetsc(c2); goto sheredelim_regular; } else if (c == '\'') { PUSH_STATE(SSQUOTE); sheredelim_quoted: *wp++ = OQUOTE; ignore_backslash_newline++; } else if (c == '"') { state = statep->ls_state = SHEREDQUOTE; *wp++ = OQUOTE; } else { sheredelim_regular: *wp++ = CHAR; *wp++ = c; } break; case SHEREDQUOTE: /* " in <<,<<- delimiter */ if (c == '"') { *wp++ = CQUOTE; state = statep->ls_state = /* dp[1] == '<' means here string */ Xstring(ws, wp)[1] == '<' ? SHERESTRING : SHEREDELIM; } else { if (c == '\\') { switch (c = getsc()) { case '\\': case '"': case '$': case '`': break; default: if (c) { /* trailing \ lost */ *wp++ = CHAR; *wp++ = '\\'; } break; } } *wp++ = CHAR; *wp++ = c; } break; case SPATTERN: /* in *(...|...) pattern (*+?@!) */ if ( /*(*/ c == ')') { *wp++ = CPAT; POP_STATE(); } else if (c == '|') { *wp++ = SPAT; } else if (c == '(') { *wp++ = OPAT; *wp++ = ' '; /* simile for @ */ PUSH_STATE(SPATTERN); } else goto Sbase1; break; } } Done: Xcheck(ws, wp); if (statep != &states[1]) /* XXX figure out what is missing */ yyerror("no closing quote\n"); #ifndef MKSH_SMALL if (state == SLETARRAY && statep->ls_sletarray.nparen != -1) yyerror("%s: ')' missing\n", T_synerr); #endif /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ if (state == SHEREDELIM || state == SHERESTRING) state = SBASE; dp = Xstring(ws, wp); if ((c == '<' || c == '>' || c == '&') && state == SBASE) { struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); if (Xlength(ws, wp) == 0) iop->unit = c == '<' ? 0 : 1; else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { if (dp[c2] != CHAR) goto no_iop; if (!ksh_isdigit(dp[c2 + 1])) goto no_iop; iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; } if (iop->unit >= FDBASE) goto no_iop; if (c == '&') { if ((c2 = getsc()) != '>') { ungetsc(c2); goto no_iop; } c = c2; iop->flag = IOBASH; } else iop->flag = 0; c2 = getsc(); /* <<, >>, <> are ok, >< is not */ if (c == c2 || (c == '<' && c2 == '>')) { iop->flag |= c == c2 ? (c == '>' ? IOCAT : IOHERE) : IORDWR; if (iop->flag == IOHERE) { if ((c2 = getsc()) == '-') iop->flag |= IOSKIP; else ungetsc(c2); } } else if (c2 == '&') iop->flag |= IODUP | (c == '<' ? IORDUP : 0); else { iop->flag |= c == '>' ? IOWRITE : IOREAD; if (c == '>' && c2 == '|') iop->flag |= IOCLOB; else ungetsc(c2); } iop->name = NULL; iop->delim = NULL; iop->heredoc = NULL; Xfree(ws, wp); /* free word */ yylval.iop = iop; return (REDIR); no_iop: ; } if (wp == dp && state == SBASE) { Xfree(ws, wp); /* free word */ /* no word, process LEX1 character */ if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { if ((c2 = getsc()) == c) c = (c == ';') ? BREAK : (c == '|') ? LOGOR : (c == '&') ? LOGAND : /* c == '(' ) */ MDPAREN; else if (c == '|' && c2 == '&') c = COPROC; else ungetsc(c2); } else if (c == '\n') { gethere(false); if (cf & CONTIN) goto Again; } else if (c == '\0') /* need here strings at EOF */ gethere(true); return (c); } *wp++ = EOS; /* terminate word */ yylval.cp = Xclose(ws, wp); if (state == SWORD || state == SLETPAREN /* XXX ONEWORD? */ #ifndef MKSH_SMALL || state == SLETARRAY #endif ) return (LWORD); /* unget terminator */ ungetsc(c); /* * note: the alias-vs-function code below depends on several * interna: starting from here, source->str is not modified; * the way getsc() and ungetsc() operate; etc. */ /* copy word to unprefixed string ident */ sp = yylval.cp; dp = ident; if ((cf & HEREDELIM) && (sp[1] == '<')) while (dp < ident+IDENT) { if ((c = *sp++) == CHAR) *dp++ = *sp++; else if ((c != OQUOTE) && (c != CQUOTE)) break; } else while (dp < ident+IDENT && (c = *sp++) == CHAR) *dp++ = *sp++; /* Make sure the ident array stays '\0' padded */ memset(dp, 0, (ident+IDENT) - dp + 1); if (c != EOS) *ident = '\0'; /* word is not unquoted */ if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { struct tbl *p; uint32_t h = hash(ident); /* { */ if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) { afree(yylval.cp, ATEMP); return (p->val.i); } if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && (p->flag & ISSET)) { /* * this still points to the same character as the * ungetsc'd terminator from above */ const char *cp = source->str; /* prefer POSIX but not Korn functions over aliases */ while (*cp == ' ' || *cp == '\t') /* * this is like getsc() without skipping * over Source boundaries (including not * parsing ungetsc'd characters that got * pushed into an SREREAD) which is what * we want here anyway: find out whether * the alias name is followed by a POSIX * function definition (only the opening * parenthesis is checked though) */ ++cp; /* prefer functions over aliases */ if (*cp == '(' /*)*/) /* * delete alias upon encountering function * definition */ ktdelete(p); else { Source *s = source; while (s->flags & SF_HASALIAS) if (s->u.tblp == p) return (LWORD); else s = s->next; /* push alias expansion */ s = pushs(SALIAS, source->areap); s->start = s->str = p->val.s; s->u.tblp = p; s->flags |= SF_HASALIAS; s->next = source; if (source->type == SEOF) { /* prevent infinite recursion at EOS */ source->u.tblp = p; source->flags |= SF_HASALIAS; } source = s; afree(yylval.cp, ATEMP); goto Again; } } } return (LWORD); } static void gethere(bool iseof) { struct ioword **p; for (p = heres; p < herep; p++) if (iseof && (*p)->delim[1] != '<') /* only here strings at EOF */ return; else readhere(*p); herep = heres; } /* * read "<delim[1] == '<') { /* process the here string */ xp = iop->heredoc = evalstr(iop->delim, DOBLANK); c = strlen(xp) - 1; memmove(xp, xp + 1, c); xp[c] = '\n'; return; } eof = evalstr(iop->delim, 0); if (!(iop->flag & IOEVAL)) ignore_backslash_newline++; Xinit(xs, xp, 256, ATEMP); for (;;) { eofp = eof; skiptabs = iop->flag & IOSKIP; xpos = Xsavepos(xs, xp); while ((c = getsc()) != 0) { if (skiptabs) { if (c == '\t') continue; skiptabs = 0; } if (c != *eofp) break; Xcheck(xs, xp); Xput(xs, xp, c); eofp++; } /* Allow EOF here so commands with out trailing newlines * will work (eg, ksh -c '...', $(...), etc). */ if (*eofp == '\0' && (c == 0 || c == '\n')) { xp = Xrestpos(xs, xp, xpos); break; } ungetsc(c); while ((c = getsc()) != '\n') { if (c == 0) yyerror("here document '%s' unclosed\n", eof); Xcheck(xs, xp); Xput(xs, xp, c); } Xcheck(xs, xp); Xput(xs, xp, c); } Xput(xs, xp, '\0'); iop->heredoc = Xclose(xs, xp); if (!(iop->flag & IOEVAL)) ignore_backslash_newline--; } void yyerror(const char *fmt, ...) { va_list va; /* pop aliases and re-reads */ while (source->type == SALIAS || source->type == SREREAD) source = source->next; source->str = null; /* zap pending input */ error_prefix(true); va_start(va, fmt); shf_vfprintf(shl_out, fmt, va); va_end(va); errorfz(); } /* * input for yylex with alias expansion */ Source * pushs(int type, Area *areap) { Source *s; s = alloc(sizeof(Source), areap); s->type = type; s->str = null; s->start = NULL; s->line = 0; s->errline = 0; s->file = NULL; s->flags = 0; s->next = NULL; s->areap = areap; if (type == SFILE || type == SSTDIN) XinitN(s->xs, 256, s->areap); else memset(&s->xs, 0, sizeof(s->xs)); return (s); } static int getsc__(void) { Source *s = source; int c; getsc_again: while ((c = *s->str++) == 0) { s->str = NULL; /* return 0 for EOF by default */ switch (s->type) { case SEOF: s->str = null; return (0); case SSTDIN: case SFILE: getsc_line(s); break; case SWSTR: break; case SSTRING: break; case SWORDS: s->start = s->str = *s->u.strv++; s->type = SWORDSEP; break; case SWORDSEP: if (*s->u.strv == NULL) { s->start = s->str = "\n"; s->type = SEOF; } else { s->start = s->str = " "; s->type = SWORDS; } break; case SALIAS: if (s->flags & SF_ALIASEND) { /* pass on an unused SF_ALIAS flag */ source = s->next; source->flags |= s->flags & SF_ALIAS; s = source; } else if (*s->u.tblp->val.s && (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { source = s = s->next; /* pop source stack */ /* Note that this alias ended with a space, * enabling alias expansion on the following * word. */ s->flags |= SF_ALIAS; } else { /* At this point, we need to keep the current * alias in the source list so recursive * aliases can be detected and we also need * to return the next character. Do this * by temporarily popping the alias to get * the next character and then put it back * in the source list with the SF_ALIASEND * flag set. */ source = s->next; /* pop source stack */ source->flags |= s->flags & SF_ALIAS; c = getsc__(); if (c) { s->flags |= SF_ALIASEND; s->ugbuf[0] = c; s->ugbuf[1] = '\0'; s->start = s->str = s->ugbuf; s->next = source; source = s; } else { s = source; /* avoid reading eof twice */ s->str = NULL; break; } } continue; case SREREAD: if (s->start != s->ugbuf) /* yuck */ afree(s->u.freeme, ATEMP); source = s = s->next; continue; } if (s->str == NULL) { s->type = SEOF; s->start = s->str = null; return ('\0'); } if (s->flags & SF_ECHO) { shf_puts(s->str, shl_out); shf_flush(shl_out); } } /* check for UTF-8 byte order mark */ if (s->flags & SF_FIRST) { s->flags &= ~SF_FIRST; if (((unsigned char)c == 0xEF) && (((const unsigned char *)(s->str))[0] == 0xBB) && (((const unsigned char *)(s->str))[1] == 0xBF)) { s->str += 2; UTFMODE = 1; goto getsc_again; } } return (c); } static void getsc_line(Source *s) { char *xp = Xstring(s->xs, xp), *cp; int interactive = Flag(FTALKING) && s->type == SSTDIN; int have_tty = interactive && (s->flags & SF_TTY); /* Done here to ensure nothing odd happens when a timeout occurs */ XcheckN(s->xs, xp, LINE); *xp = '\0'; s->start = s->str = xp; if (have_tty && ksh_tmout) { ksh_tmout_state = TMOUT_READING; alarm(ksh_tmout); } if (have_tty && ( #if !MKSH_S_NOVI Flag(FVI) || #endif Flag(FEMACS) || Flag(FGMACS))) { int nread; nread = x_read(xp, LINE); if (nread < 0) /* read error */ nread = 0; xp[nread] = '\0'; xp += nread; } else { if (interactive) { pprompt(prompt, 0); } else s->line++; while (1) { char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); if (!p && shf_error(s->u.shf) && shf_errno(s->u.shf) == EINTR) { shf_clearerr(s->u.shf); if (trap) runtraps(0); continue; } if (!p || (xp = p, xp[-1] == '\n')) break; /* double buffer size */ xp++; /* move past null so doubling works... */ XcheckN(s->xs, xp, Xlength(s->xs, xp)); xp--; /* ...and move back again */ } /* flush any unwanted input so other programs/builtins * can read it. Not very optimal, but less error prone * than flushing else where, dealing with redirections, * etc. * todo: reduce size of shf buffer (~128?) if SSTDIN */ if (s->type == SSTDIN) shf_flush(s->u.shf); } /* XXX: temporary kludge to restore source after a * trap may have been executed. */ source = s; if (have_tty && ksh_tmout) { ksh_tmout_state = TMOUT_EXECUTING; alarm(0); } cp = Xstring(s->xs, xp); #ifndef MKSH_SMALL if (interactive && *cp == '!' && cur_prompt == PS1) { int linelen; linelen = Xlength(s->xs, xp); XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1); /* reload after potential realloc */ cp = Xstring(s->xs, xp); /* change initial '!' into space */ *cp = ' '; /* NUL terminate the current string */ *xp = '\0'; /* move the actual string forward */ memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1); xp += fc_e_n; /* prepend it with "fc -e -" */ memcpy(cp, fc_e_, fc_e_n); } #endif s->start = s->str = cp; strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); /* Note: if input is all nulls, this is not eof */ if (Xlength(s->xs, xp) == 0) { /* EOF */ if (s->type == SFILE) shf_fdclose(s->u.shf); s->str = NULL; } else if (interactive && *s->str && (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) histsave(&s->line, s->str, true, true); if (interactive) set_prompt(PS2, NULL); } void set_prompt(int to, Source *s) { cur_prompt = to; switch (to) { case PS1: /* command */ /* Substitute ! and !! here, before substitutions are done * so ! in expanded variables are not expanded. * NOTE: this is not what AT&T ksh does (it does it after * substitutions, POSIX doesn't say which is to be done. */ { struct shf *shf; char * volatile ps1; Area *saved_atemp; ps1 = str_val(global("PS1")); shf = shf_sopen(NULL, strlen(ps1) * 2, SHF_WR | SHF_DYNAMIC, NULL); while (*ps1) if (*ps1 != '!' || *++ps1 == '!') shf_putchar(*ps1++, shf); else shf_fprintf(shf, "%d", s ? s->line + 1 : 0); ps1 = shf_sclose(shf); saved_atemp = ATEMP; newenv(E_ERRH); if (sigsetjmp(e->jbuf, 0)) { prompt = safe_prompt; /* Don't print an error - assume it has already * been printed. Reason is we may have forked * to run a command and the child may be * unwinding its stack through this code as it * exits. */ } else { char *cp = substitute(ps1, 0); strdupx(prompt, cp, saved_atemp); } quitenv(NULL); } break; case PS2: /* command continuation */ prompt = str_val(global("PS2")); break; } } static int dopprompt(const char *cp, int ntruncate, bool doprint) { int columns = 0, lines = 0, indelimit = 0; char delimiter = 0; /* Undocumented AT&T ksh feature: * If the second char in the prompt string is \r then the first char * is taken to be a non-printing delimiter and any chars between two * instances of the delimiter are not considered to be part of the * prompt length */ if (*cp && cp[1] == '\r') { delimiter = *cp; cp += 2; } for (; *cp; cp++) { if (indelimit && *cp != delimiter) ; else if (*cp == '\n' || *cp == '\r') { lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); columns = 0; } else if (*cp == '\t') { columns = (columns | 7) + 1; } else if (*cp == '\b') { if (columns > 0) columns--; } else if (*cp == delimiter) indelimit = !indelimit; else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { const char *cp2; columns += utf_widthadj(cp, &cp2); if (doprint && (indelimit || (ntruncate < (x_cols * lines + columns)))) shf_write(cp, cp2 - cp, shl_out); cp = cp2 - /* loop increment */ 1; continue; } else columns++; if (doprint && (*cp != delimiter) && (indelimit || (ntruncate < (x_cols * lines + columns)))) shf_putc(*cp, shl_out); } if (doprint) shf_flush(shl_out); return (x_cols * lines + columns); } void pprompt(const char *cp, int ntruncate) { dopprompt(cp, ntruncate, true); } int promptlen(const char *cp) { return (dopprompt(cp, 0, false)); } /* Read the variable part of a ${...} expression (ie, up to but not including * the :[-+?=#%] or close-brace. */ static char * get_brace_var(XString *wsp, char *wp) { enum parse_state { PS_INITIAL, PS_SAW_HASH, PS_IDENT, PS_NUMBER, PS_VAR1, PS_END } state; char c; state = PS_INITIAL; while (1) { c = getsc(); /* State machine to figure out where the variable part ends. */ switch (state) { case PS_INITIAL: if (c == '#' || c == '!') { state = PS_SAW_HASH; break; } /* FALLTHRU */ case PS_SAW_HASH: if (ksh_isalphx(c)) state = PS_IDENT; else if (ksh_isdigit(c)) state = PS_NUMBER; else if (ctype(c, C_VAR1)) state = PS_VAR1; else state = PS_END; break; case PS_IDENT: if (!ksh_isalnux(c)) { state = PS_END; if (c == '[') { char *tmp, *p; if (!arraysub(&tmp)) yyerror("missing ]\n"); *wp++ = c; for (p = tmp; *p; ) { Xcheck(*wsp, wp); *wp++ = *p++; } afree(tmp, ATEMP); c = getsc(); /* the ] */ } } break; case PS_NUMBER: if (!ksh_isdigit(c)) state = PS_END; break; case PS_VAR1: state = PS_END; break; case PS_END: /* keep gcc happy */ break; } if (state == PS_END) { *wp++ = '\0'; /* end of variable part */ ungetsc(c); break; } Xcheck(*wsp, wp); *wp++ = c; } return (wp); } /* * Save an array subscript - returns true if matching bracket found, false * if eof or newline was found. * (Returned string double null terminated) */ static int arraysub(char **strp) { XString ws; char *wp; char c; int depth = 1; /* we are just past the initial [ */ Xinit(ws, wp, 32, ATEMP); do { c = getsc(); Xcheck(ws, wp); *wp++ = c; if (c == '[') depth++; else if (c == ']') depth--; } while (depth > 0 && c && c != '\n'); *wp++ = '\0'; *strp = Xclose(ws, wp); return (depth == 0 ? 1 : 0); } /* Unget a char: handles case when we are already at the start of the buffer */ static const char * ungetsc(int c) { if (backslash_skip) backslash_skip--; /* Don't unget eof... */ if (source->str == null && c == '\0') return (source->str); if (source->str > source->start) source->str--; else { Source *s; s = pushs(SREREAD, source->areap); s->ugbuf[0] = c; s->ugbuf[1] = '\0'; s->start = s->str = s->ugbuf; s->next = source; source = s; } return (source->str); } /* Called to get a char that isn't a \newline sequence. */ static int getsc_bn(void) { int c, c2; if (ignore_backslash_newline) return (getsc_()); if (backslash_skip == 1) { backslash_skip = 2; return (getsc_()); } backslash_skip = 0; while (1) { c = getsc_(); if (c == '\\') { if ((c2 = getsc_()) == '\n') /* ignore the \newline; get the next char... */ continue; ungetsc(c2); backslash_skip = 1; } return (c); } } static Lex_state * push_state_(State_info *si, Lex_state *old_end) { Lex_state *new = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP); new[0].ls_info.base = old_end; si->base = &new[0]; si->end = &new[STATE_BSIZE]; return (&new[1]); } static Lex_state * pop_state_(State_info *si, Lex_state *old_end) { Lex_state *old_base = si->base; si->base = old_end->ls_info.base - STATE_BSIZE; si->end = old_end->ls_info.base; afree(old_base, ATEMP); return (si->base + STATE_BSIZE - 1); } static int s_get(void) { return (getsc()); } static void s_put(int c) { ungetsc(c); }