mksh/lex.c
tg 4392aced96 the capability to get array indicēs (keys), Korn shell style
bash4 doesn’t have it at all, despite knowing associative arrays
zsh does it………… differently and weird

this is for indexed arrays, as mksh doesn’t have associative arrays
but it should help ☺
2009-08-28 22:39:09 +00:00

1607 lines
36 KiB
C

/* $OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $ */
/*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009
* Thorsten Glaser <tg@mirbsd.org>
*
* Provided that these terms and disclaimer and all copyright notices
* are retained or reproduced in an accompanying document, permission
* is granted to deal in this work without restriction, including un-
* limited rights to use, publicly perform, distribute, sell, modify,
* merge, give away, or sublicence.
*
* This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
* the utmost extent permitted by applicable law, neither express nor
* implied; without malicious intent or gross negligence. In no event
* may a licensor, author or contributor be held liable for indirect,
* direct, other damage, loss, or other issues arising in any way out
* of dealing in the work, even if advised of the possibility of such
* damage or existence of a defect, except proven that it results out
* of said person's immediate fault when using the work as intended.
*/
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.93 2009/08/28 22:39:09 tg Exp $");
/*
* states while lexing word
*/
#define SBASE 0 /* outside any lexical constructs */
#define SWORD 1 /* implicit quoting for substitute() */
#define SLETPAREN 2 /* inside (( )), implicit quoting */
#define SSQUOTE 3 /* inside '' */
#define SDQUOTE 4 /* inside "" */
#define SBRACE 5 /* inside ${} */
#define SCSPAREN 6 /* inside $() */
#define SBQUOTE 7 /* inside `` */
#define SASPAREN 8 /* inside $(( )) */
#define SHEREDELIM 9 /* parsing <<,<<- delimiter */
#define SHEREDQUOTE 10 /* parsing " in <<,<<- delimiter */
#define SPATTERN 11 /* parsing *(...|...) pattern (*+?@!) */
#define STBRACE 12 /* parsing ${..[#%]..} */
#define SLETARRAY 13 /* inside =( ), just copy */
#define SADELIM 14 /* like SBASE, looking for delimiter */
#define SHERESTRING 15 /* parsing <<< string */
/* Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state. */
typedef struct lex_state Lex_state;
struct lex_state {
int ls_state;
union {
/* $(...) */
struct scsparen_info {
int nparen; /* count open parenthesis */
int csstate; /* XXX remove */
#define ls_scsparen ls_info.u_scsparen
} u_scsparen;
/* $((...)) */
struct sasparen_info {
int nparen; /* count open parenthesis */
int start; /* marks start of $(( in output str */
#define ls_sasparen ls_info.u_sasparen
} u_sasparen;
/* ((...)) */
struct sletparen_info {
int nparen; /* count open parenthesis */
#define ls_sletparen ls_info.u_sletparen
} u_sletparen;
/* `...` */
struct sbquote_info {
int indquotes; /* true if in double quotes: "`...`" */
#define ls_sbquote ls_info.u_sbquote
} u_sbquote;
/* =(...) */
struct sletarray_info {
int nparen; /* count open parentheses */
#define ls_sletarray ls_info.u_sletarray
} u_sletarray;
/* ADELIM */
struct sadelim_info {
unsigned char nparen; /* count open parentheses */
#define SADELIM_BASH 0
#define SADELIM_MAKE 1
unsigned char style;
unsigned char delimiter;
unsigned char num;
unsigned char flags; /* ofs. into sadelim_flags[] */
#define ls_sadelim ls_info.u_sadelim
} u_sadelim;
Lex_state *base; /* used to point to next state block */
} ls_info;
};
typedef struct {
Lex_state *base;
Lex_state *end;
} State_info;
static void readhere(struct ioword *);
static int getsc__(void);
static void getsc_line(Source *);
static int getsc_bn(void);
static char *get_brace_var(XString *, char *);
static int arraysub(char **);
static const char *ungetsc(int);
static void gethere(bool);
static Lex_state *push_state_(State_info *, Lex_state *);
static Lex_state *pop_state_(State_info *, Lex_state *);
static int dopprompt(const char *, int, bool);
static int backslash_skip;
static int ignore_backslash_newline;
/* optimised getsc_bn() */
#define getsc() (*source->str != '\0' && *source->str != '\\' \
&& !backslash_skip && !(source->flags & SF_FIRST) \
? *source->str++ : getsc_bn())
/* optimised getsc__() */
#define getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
? *source->str++ : getsc__())
#define STATE_BSIZE 32
#define PUSH_STATE(s) do { \
if (++statep == state_info.end) \
statep = push_state_(&state_info, statep); \
state = statep->ls_state = (s); \
} while (0)
#define POP_STATE() do { \
if (--statep == state_info.base) \
statep = pop_state_(&state_info, statep); \
state = statep->ls_state; \
} while (0)
/**
* Lexical analyser
*
* tokens are not regular expressions, they are LL(1).
* for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
* hence the state stack.
*/
int
yylex(int cf)
{
Lex_state states[STATE_BSIZE], *statep, *s2, *base;
State_info state_info;
int c, state;
XString ws; /* expandable output word */
char *wp; /* output word pointer */
char *sp, *dp;
int c2;
Again:
states[0].ls_state = -1;
states[0].ls_info.base = NULL;
statep = &states[1];
state_info.base = states;
state_info.end = &state_info.base[STATE_BSIZE];
Xinit(ws, wp, 64, ATEMP);
backslash_skip = 0;
ignore_backslash_newline = 0;
if (cf&ONEWORD)
state = SWORD;
else if (cf&LETEXPR) {
*wp++ = OQUOTE; /* enclose arguments in (double) quotes */
state = SLETPAREN;
statep->ls_sletparen.nparen = 0;
} else if (cf&LETARRAY) {
state = SLETARRAY;
statep->ls_sletarray.nparen = 0;
} else { /* normal lexing */
state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
while ((c = getsc()) == ' ' || c == '\t')
;
if (c == '#') {
ignore_backslash_newline++;
while ((c = getsc()) != '\0' && c != '\n')
;
ignore_backslash_newline--;
}
ungetsc(c);
}
if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
source->flags &= ~SF_ALIAS;
cf |= ALIAS;
}
/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
statep->ls_state = state;
/* check for here string */
if (state == SHEREDELIM) {
c = getsc();
if (c == '<') {
state = SHERESTRING;
while ((c = getsc()) == ' ' || c == '\t')
;
ungetsc(c);
c = '<';
goto accept_nonword;
}
ungetsc(c);
}
/* collect non-special or quoted characters to form word */
while (!((c = getsc()) == 0 ||
((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
ctype(c, C_LEX1)))) {
accept_nonword:
Xcheck(ws, wp);
switch (state) {
case SADELIM:
if (c == '(')
statep->ls_sadelim.nparen++;
else if (c == ')')
statep->ls_sadelim.nparen--;
else if (statep->ls_sadelim.nparen == 0 &&
(c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
*wp++ = ADELIM;
*wp++ = c;
if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
POP_STATE();
if (c == /*{*/ '}')
POP_STATE();
break;
}
/* FALLTHROUGH */
case SBASE:
if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
*wp = EOS; /* temporary */
if (is_wdvarname(Xstring(ws, wp), false)) {
char *p, *tmp;
if (arraysub(&tmp)) {
*wp++ = CHAR;
*wp++ = c;
for (p = tmp; *p; ) {
Xcheck(ws, wp);
*wp++ = CHAR;
*wp++ = *p++;
}
afree(tmp, ATEMP);
break;
} else {
Source *s;
s = pushs(SREREAD,
source->areap);
s->start = s->str =
s->u.freeme = tmp;
s->next = source;
source = s;
}
}
*wp++ = CHAR;
*wp++ = c;
break;
}
/* FALLTHRU */
Sbase1: /* includes *(...|...) pattern (*+?@!) */
if (c == '*' || c == '@' || c == '+' || c == '?' ||
c == '!') {
c2 = getsc();
if (c2 == '(' /*)*/ ) {
*wp++ = OPAT;
*wp++ = c;
PUSH_STATE(SPATTERN);
break;
}
ungetsc(c2);
}
/* FALLTHRU */
Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
switch (c) {
case '\\':
c = getsc();
if (c) /* trailing \ is lost */
*wp++ = QCHAR, *wp++ = c;
break;
case '\'':
*wp++ = OQUOTE;
ignore_backslash_newline++;
PUSH_STATE(SSQUOTE);
break;
case '"':
*wp++ = OQUOTE;
PUSH_STATE(SDQUOTE);
break;
default:
goto Subst;
}
break;
Subst:
switch (c) {
case '\\':
c = getsc();
switch (c) {
case '"':
if ((cf & HEREDOC))
goto heredocquote;
/* FALLTHROUGH */
case '\\':
case '$': case '`':
*wp++ = QCHAR, *wp++ = c;
break;
default:
heredocquote:
Xcheck(ws, wp);
if (c) { /* trailing \ is lost */
*wp++ = CHAR, *wp++ = '\\';
*wp++ = CHAR, *wp++ = c;
}
break;
}
break;
case '$':
c = getsc();
if (c == '(') /*)*/ {
c = getsc();
if (c == '(') /*)*/ {
PUSH_STATE(SASPAREN);
statep->ls_sasparen.nparen = 2;
statep->ls_sasparen.start =
Xsavepos(ws, wp);
*wp++ = EXPRSUB;
} else {
ungetsc(c);
PUSH_STATE(SCSPAREN);
statep->ls_scsparen.nparen = 1;
statep->ls_scsparen.csstate = 0;
*wp++ = COMSUB;
}
} else if (c == '{') /*}*/ {
*wp++ = OSUBST;
*wp++ = '{'; /*}*/
wp = get_brace_var(&ws, wp);
c = getsc();
/* allow :# and :% (ksh88 compat) */
if (c == ':') {
*wp++ = CHAR, *wp++ = c;
c = getsc();
if (c == ':') {
*wp++ = CHAR;
*wp++ = '0';
*wp++ = ADELIM;
*wp++ = ':';
PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = ':';
statep->ls_sadelim.num = 1;
statep->ls_sadelim.nparen = 0;
break;
} else if (ksh_isdigit(c) ||
c == '('/*)*/ || c == ' ' ||
c == '$' /* XXX what else? */) {
/* substring subst. */
if (c != ' ') {
*wp++ = CHAR;
*wp++ = ' ';
}
ungetsc(c);
PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = ':';
statep->ls_sadelim.num = 2;
statep->ls_sadelim.nparen = 0;
break;
}
} else if (c == '/') {
*wp++ = CHAR, *wp++ = c;
if ((c = getsc()) == '/') {
*wp++ = ADELIM;
*wp++ = c;
} else
ungetsc(c);
PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = '/';
statep->ls_sadelim.num = 1;
statep->ls_sadelim.nparen = 0;
break;
}
/* If this is a trim operation,
* treat (,|,) specially in STBRACE.
*/
if (ctype(c, C_SUBOP2)) {
ungetsc(c);
PUSH_STATE(STBRACE);
} else {
ungetsc(c);
PUSH_STATE(SBRACE);
}
} else if (ksh_isalphx(c)) {
*wp++ = OSUBST;
*wp++ = 'X';
do {
Xcheck(ws, wp);
*wp++ = c;
c = getsc();
} while (ksh_isalnux(c));
*wp++ = '\0';
*wp++ = CSUBST;
*wp++ = 'X';
ungetsc(c);
} else if (ctype(c, C_VAR1 | C_DIGIT)) {
Xcheck(ws, wp);
*wp++ = OSUBST;
*wp++ = 'X';
*wp++ = c;
*wp++ = '\0';
*wp++ = CSUBST;
*wp++ = 'X';
} else {
*wp++ = CHAR, *wp++ = '$';
ungetsc(c);
}
break;
case '`':
PUSH_STATE(SBQUOTE);
*wp++ = COMSUB;
/* Need to know if we are inside double quotes
* since sh/AT&T-ksh translate the \" to " in
* "`..\"..`".
* This is not done in posix mode (section
* 3.2.3, Double Quotes: "The backquote shall
* retain its special meaning introducing the
* other form of command substitution (see
* 3.6.3). The portion of the quoted string
* from the initial backquote and the
* characters up to the next backquote that
* is not preceded by a backslash (having
* escape characters removed) defines that
* command whose output replaces `...` when
* the word is expanded."
* Section 3.6.3, Command Substitution:
* "Within the backquoted style of command
* substitution, backslash shall retain its
* literal meaning, except when followed by
* $ ` \.").
*/
statep->ls_sbquote.indquotes = 0;
s2 = statep;
base = state_info.base;
while (1) {
for (; s2 != base; s2--) {
if (s2->ls_state == SDQUOTE) {
statep->ls_sbquote.indquotes = 1;
break;
}
}
if (s2 != base)
break;
if (!(s2 = s2->ls_info.base))
break;
base = s2-- - STATE_BSIZE;
}
break;
case QCHAR:
if (cf & LQCHAR) {
*wp++ = QCHAR;
*wp++ = getsc();
break;
}
/* FALLTHRU */
default:
*wp++ = CHAR, *wp++ = c;
}
break;
case SSQUOTE:
if (c == '\'') {
POP_STATE();
*wp++ = CQUOTE;
ignore_backslash_newline--;
} else
*wp++ = QCHAR, *wp++ = c;
break;
case SDQUOTE:
if (c == '"') {
POP_STATE();
*wp++ = CQUOTE;
} else
goto Subst;
break;
case SCSPAREN: /* $( .. ) */
/* todo: deal with $(...) quoting properly
* kludge to partly fake quoting inside $(..): doesn't
* really work because nested $(..) or ${..} inside
* double quotes aren't dealt with.
*/
switch (statep->ls_scsparen.csstate) {
case 0: /* normal */
switch (c) {
case '(':
statep->ls_scsparen.nparen++;
break;
case ')':
statep->ls_scsparen.nparen--;
break;
case '\\':
statep->ls_scsparen.csstate = 1;
break;
case '"':
statep->ls_scsparen.csstate = 2;
break;
case '\'':
statep->ls_scsparen.csstate = 4;
ignore_backslash_newline++;
break;
}
break;
case 1: /* backslash in normal mode */
case 3: /* backslash in double quotes */
--statep->ls_scsparen.csstate;
break;
case 2: /* double quotes */
if (c == '"')
statep->ls_scsparen.csstate = 0;
else if (c == '\\')
statep->ls_scsparen.csstate = 3;
break;
case 4: /* single quotes */
if (c == '\'') {
statep->ls_scsparen.csstate = 0;
ignore_backslash_newline--;
}
break;
}
if (statep->ls_scsparen.nparen == 0) {
POP_STATE();
*wp++ = 0; /* end of COMSUB */
} else
*wp++ = c;
break;
case SASPAREN: /* $(( .. )) */
/* todo: deal with $((...); (...)) properly */
/* XXX should nest using existing state machine
* (embed "..", $(...), etc.) */
if (c == '(')
statep->ls_sasparen.nparen++;
else if (c == ')') {
statep->ls_sasparen.nparen--;
if (statep->ls_sasparen.nparen == 1) {
/*(*/
if ((c2 = getsc()) == ')') {
POP_STATE();
*wp++ = 0; /* end of EXPRSUB */
break;
} else {
char *s;
ungetsc(c2);
/* mismatched parenthesis -
* assume we were really
* parsing a $(..) expression
*/
s = Xrestpos(ws, wp,
statep->ls_sasparen.start);
memmove(s + 1, s, wp - s);
*s++ = COMSUB;
*s = '('; /*)*/
wp++;
statep->ls_scsparen.nparen = 1;
statep->ls_scsparen.csstate = 0;
state = statep->ls_state =
SCSPAREN;
}
}
}
*wp++ = c;
break;
case SBRACE:
/*{*/
if (c == '}') {
POP_STATE();
*wp++ = CSUBST;
*wp++ = /*{*/ '}';
} else
goto Sbase1;
break;
case STBRACE:
/* Same as SBRACE, except (,|,) treated specially */
/*{*/
if (c == '}') {
POP_STATE();
*wp++ = CSUBST;
*wp++ = /*{*/ '}';
} else if (c == '|') {
*wp++ = SPAT;
} else if (c == '(') {
*wp++ = OPAT;
*wp++ = ' '; /* simile for @ */
PUSH_STATE(SPATTERN);
} else
goto Sbase1;
break;
case SBQUOTE:
if (c == '`') {
*wp++ = 0;
POP_STATE();
} else if (c == '\\') {
switch (c = getsc()) {
case '\\':
case '$': case '`':
*wp++ = c;
break;
case '"':
if (statep->ls_sbquote.indquotes) {
*wp++ = c;
break;
}
/* FALLTHRU */
default:
if (c) { /* trailing \ is lost */
*wp++ = '\\';
*wp++ = c;
}
break;
}
} else
*wp++ = c;
break;
case SWORD: /* ONEWORD */
goto Subst;
case SLETPAREN: /* LETEXPR: (( ... )) */
/*(*/
if (c == ')') {
if (statep->ls_sletparen.nparen > 0)
--statep->ls_sletparen.nparen;
/*(*/
else if ((c2 = getsc()) == ')') {
c = 0;
*wp++ = CQUOTE;
goto Done;
} else
ungetsc(c2);
} else if (c == '(')
/* parenthesis inside quotes and backslashes
* are lost, but AT&T ksh doesn't count them
* either
*/
++statep->ls_sletparen.nparen;
goto Sbase2;
case SLETARRAY: /* LETARRAY: =( ... ) */
if (c == '('/*)*/)
++statep->ls_sletarray.nparen;
else if (c == /*(*/')')
if (statep->ls_sletarray.nparen-- == 0) {
c = 0;
goto Done;
}
*wp++ = CHAR, *wp++ = c;
break;
case SHERESTRING: /* <<< delimiter */
if (c == '\\') {
c = getsc();
if (c) { /* trailing \ is lost */
*wp++ = QCHAR;
*wp++ = c;
}
/* invoke quoting mode */
Xstring(ws, wp)[0] = QCHAR;
} else if (c == '\'') {
PUSH_STATE(SSQUOTE);
*wp++ = OQUOTE;
ignore_backslash_newline++;
/* invoke quoting mode */
Xstring(ws, wp)[0] = QCHAR;
} else if (c == '"') {
state = statep->ls_state = SHEREDQUOTE;
*wp++ = OQUOTE;
/* just don't IFS split; no quoting mode */
} else {
*wp++ = CHAR;
*wp++ = c;
}
break;
case SHEREDELIM: /* <<,<<- delimiter */
/* XXX chuck this state (and the next) - use
* the existing states ($ and \`..` should be
* stripped of their specialness after the
* fact).
*/
/* here delimiters need a special case since
* $ and `..` are not to be treated specially
*/
if (c == '\\') {
c = getsc();
if (c) { /* trailing \ is lost */
*wp++ = QCHAR;
*wp++ = c;
}
} else if (c == '\'') {
PUSH_STATE(SSQUOTE);
*wp++ = OQUOTE;
ignore_backslash_newline++;
} else if (c == '"') {
state = statep->ls_state = SHEREDQUOTE;
*wp++ = OQUOTE;
} else {
*wp++ = CHAR;
*wp++ = c;
}
break;
case SHEREDQUOTE: /* " in <<,<<- delimiter */
if (c == '"') {
*wp++ = CQUOTE;
state = statep->ls_state =
/* dp[1] == '<' means here string */
Xstring(ws, wp)[1] == '<' ?
SHERESTRING : SHEREDELIM;
} else {
if (c == '\\') {
switch (c = getsc()) {
case '\\': case '"':
case '$': case '`':
break;
default:
if (c) { /* trailing \ lost */
*wp++ = CHAR;
*wp++ = '\\';
}
break;
}
}
*wp++ = CHAR;
*wp++ = c;
}
break;
case SPATTERN: /* in *(...|...) pattern (*+?@!) */
if ( /*(*/ c == ')') {
*wp++ = CPAT;
POP_STATE();
} else if (c == '|') {
*wp++ = SPAT;
} else if (c == '(') {
*wp++ = OPAT;
*wp++ = ' '; /* simile for @ */
PUSH_STATE(SPATTERN);
} else
goto Sbase1;
break;
}
}
Done:
Xcheck(ws, wp);
if (statep != &states[1])
/* XXX figure out what is missing */
yyerror("no closing quote\n");
if (state == SLETARRAY && statep->ls_sletarray.nparen != -1)
yyerror("%s: ')' missing\n", T_synerr);
/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
if (state == SHEREDELIM || state == SHERESTRING)
state = SBASE;
dp = Xstring(ws, wp);
if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
if (Xlength(ws, wp) == 0)
iop->unit = c == '<' ? 0 : 1;
else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
if (dp[c2] != CHAR)
goto no_iop;
if (!ksh_isdigit(dp[c2 + 1]))
goto no_iop;
iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
}
if (iop->unit >= FDBASE)
goto no_iop;
if (c == '&') {
if ((c2 = getsc()) != '>') {
ungetsc(c2);
goto no_iop;
}
c = c2;
iop->flag = IOBASH;
} else
iop->flag = 0;
c2 = getsc();
/* <<, >>, <> are ok, >< is not */
if (c == c2 || (c == '<' && c2 == '>')) {
iop->flag |= c == c2 ?
(c == '>' ? IOCAT : IOHERE) : IORDWR;
if (iop->flag == IOHERE) {
if ((c2 = getsc()) == '-')
iop->flag |= IOSKIP;
else
ungetsc(c2);
}
} else if (c2 == '&')
iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
else {
iop->flag |= c == '>' ? IOWRITE : IOREAD;
if (c == '>' && c2 == '|')
iop->flag |= IOCLOB;
else
ungetsc(c2);
}
iop->name = NULL;
iop->delim = NULL;
iop->heredoc = NULL;
Xfree(ws, wp); /* free word */
yylval.iop = iop;
return (REDIR);
no_iop:
;
}
if (wp == dp && state == SBASE) {
Xfree(ws, wp); /* free word */
/* no word, process LEX1 character */
if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
if ((c2 = getsc()) == c)
c = (c == ';') ? BREAK :
(c == '|') ? LOGOR :
(c == '&') ? LOGAND :
/*
* this is the place where
* ((...); (...))
* and similar is broken
*/
/* c == '(' ) */ MDPAREN;
else if (c == '|' && c2 == '&')
c = COPROC;
else
ungetsc(c2);
} else if (c == '\n') {
gethere(false);
if (cf & CONTIN)
goto Again;
} else if (c == '\0')
/* need here strings at EOF */
gethere(true);
return (c);
}
*wp++ = EOS; /* terminate word */
yylval.cp = Xclose(ws, wp);
if (state == SWORD || state == SLETPAREN ||
state == SLETARRAY) /* ONEWORD? */
return (LWORD);
/* unget terminator */
ungetsc(c);
/*
* note: the alias-vs-function code below depends on several
* interna: starting from here, source->str is not modified;
* the way getsc() and ungetsc() operate; etc.
*/
/* copy word to unprefixed string ident */
sp = yylval.cp;
dp = ident;
if ((cf & HEREDELIM) && (sp[1] == '<'))
while (dp < ident+IDENT) {
if ((c = *sp++) == CHAR)
*dp++ = *sp++;
else if ((c != OQUOTE) && (c != CQUOTE))
break;
}
else
while (dp < ident+IDENT && (c = *sp++) == CHAR)
*dp++ = *sp++;
/* Make sure the ident array stays '\0' padded */
memset(dp, 0, (ident+IDENT) - dp + 1);
if (c != EOS)
*ident = '\0'; /* word is not unquoted */
if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
struct tbl *p;
uint32_t h = hash(ident);
/* { */
if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
(!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
afree(yylval.cp, ATEMP);
return (p->val.i);
}
if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
(p->flag & ISSET)) {
/*
* this still points to the same character as the
* ungetsc'd terminator from above
*/
const char *cp = source->str;
/* prefer POSIX but not Korn functions over aliases */
while (*cp == ' ' || *cp == '\t')
/*
* this is like getsc() without skipping
* over Source boundaries (including not
* parsing ungetsc'd characters that got
* pushed into an SREREAD) which is what
* we want here anyway: find out whether
* the alias name is followed by a POSIX
* function definition (only the opening
* parenthesis is checked though)
*/
++cp;
/* prefer functions over aliases */
if (*cp == '(' /*)*/)
/*
* delete alias upon encountering function
* definition
*/
ktdelete(p);
else {
Source *s = source;
while (s->flags & SF_HASALIAS)
if (s->u.tblp == p)
return (LWORD);
else
s = s->next;
/* push alias expansion */
s = pushs(SALIAS, source->areap);
s->start = s->str = p->val.s;
s->u.tblp = p;
s->flags |= SF_HASALIAS;
s->next = source;
if (source->type == SEOF) {
/* prevent infinite recursion at EOS */
source->u.tblp = p;
source->flags |= SF_HASALIAS;
}
source = s;
afree(yylval.cp, ATEMP);
goto Again;
}
}
}
return (LWORD);
}
static void
gethere(bool iseof)
{
struct ioword **p;
for (p = heres; p < herep; p++)
if (iseof && (*p)->delim[1] != '<')
/* only here strings at EOF */
return;
else
readhere(*p);
herep = heres;
}
/*
* read "<<word" text into temp file
*/
static void
readhere(struct ioword *iop)
{
int c;
char *volatile eof;
char *eofp;
int skiptabs;
XString xs;
char *xp;
int xpos;
if (iop->delim[1] == '<') {
/* process the here string */
xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
c = strlen(xp) - 1;
memmove(xp, xp + 1, c);
xp[c] = '\n';
return;
}
eof = evalstr(iop->delim, 0);
if (!(iop->flag & IOEVAL))
ignore_backslash_newline++;
Xinit(xs, xp, 256, ATEMP);
for (;;) {
eofp = eof;
skiptabs = iop->flag & IOSKIP;
xpos = Xsavepos(xs, xp);
while ((c = getsc()) != 0) {
if (skiptabs) {
if (c == '\t')
continue;
skiptabs = 0;
}
if (c != *eofp)
break;
Xcheck(xs, xp);
Xput(xs, xp, c);
eofp++;
}
/* Allow EOF here so commands with out trailing newlines
* will work (eg, ksh -c '...', $(...), etc).
*/
if (*eofp == '\0' && (c == 0 || c == '\n')) {
xp = Xrestpos(xs, xp, xpos);
break;
}
ungetsc(c);
while ((c = getsc()) != '\n') {
if (c == 0)
yyerror("here document '%s' unclosed\n", eof);
Xcheck(xs, xp);
Xput(xs, xp, c);
}
Xcheck(xs, xp);
Xput(xs, xp, c);
}
Xput(xs, xp, '\0');
iop->heredoc = Xclose(xs, xp);
if (!(iop->flag & IOEVAL))
ignore_backslash_newline--;
}
void
yyerror(const char *fmt, ...)
{
va_list va;
/* pop aliases and re-reads */
while (source->type == SALIAS || source->type == SREREAD)
source = source->next;
source->str = null; /* zap pending input */
error_prefix(true);
va_start(va, fmt);
shf_vfprintf(shl_out, fmt, va);
va_end(va);
errorfz();
}
/*
* input for yylex with alias expansion
*/
Source *
pushs(int type, Area *areap)
{
Source *s;
s = alloc(sizeof(Source), areap);
s->type = type;
s->str = null;
s->start = NULL;
s->line = 0;
s->errline = 0;
s->file = NULL;
s->flags = 0;
s->next = NULL;
s->areap = areap;
if (type == SFILE || type == SSTDIN)
XinitN(s->xs, 256, s->areap);
else
memset(&s->xs, 0, sizeof(s->xs));
return (s);
}
static int
getsc__(void)
{
Source *s = source;
int c;
getsc_again:
while ((c = *s->str++) == 0) {
s->str = NULL; /* return 0 for EOF by default */
switch (s->type) {
case SEOF:
s->str = null;
return (0);
case SSTDIN:
case SFILE:
getsc_line(s);
break;
case SWSTR:
break;
case SSTRING:
break;
case SWORDS:
s->start = s->str = *s->u.strv++;
s->type = SWORDSEP;
break;
case SWORDSEP:
if (*s->u.strv == NULL) {
s->start = s->str = "\n";
s->type = SEOF;
} else {
s->start = s->str = " ";
s->type = SWORDS;
}
break;
case SALIAS:
if (s->flags & SF_ALIASEND) {
/* pass on an unused SF_ALIAS flag */
source = s->next;
source->flags |= s->flags & SF_ALIAS;
s = source;
} else if (*s->u.tblp->val.s &&
(c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
source = s = s->next; /* pop source stack */
/* Note that this alias ended with a space,
* enabling alias expansion on the following
* word.
*/
s->flags |= SF_ALIAS;
} else {
/* At this point, we need to keep the current
* alias in the source list so recursive
* aliases can be detected and we also need
* to return the next character. Do this
* by temporarily popping the alias to get
* the next character and then put it back
* in the source list with the SF_ALIASEND
* flag set.
*/
source = s->next; /* pop source stack */
source->flags |= s->flags & SF_ALIAS;
c = getsc__();
if (c) {
s->flags |= SF_ALIASEND;
s->ugbuf[0] = c; s->ugbuf[1] = '\0';
s->start = s->str = s->ugbuf;
s->next = source;
source = s;
} else {
s = source;
/* avoid reading eof twice */
s->str = NULL;
break;
}
}
continue;
case SREREAD:
if (s->start != s->ugbuf) /* yuck */
afree(s->u.freeme, ATEMP);
source = s = s->next;
continue;
}
if (s->str == NULL) {
s->type = SEOF;
s->start = s->str = null;
return ('\0');
}
if (s->flags & SF_ECHO) {
shf_puts(s->str, shl_out);
shf_flush(shl_out);
}
}
/* check for UTF-8 byte order mark */
if (s->flags & SF_FIRST) {
s->flags &= ~SF_FIRST;
if (((unsigned char)c == 0xEF) &&
(((const unsigned char *)(s->str))[0] == 0xBB) &&
(((const unsigned char *)(s->str))[1] == 0xBF)) {
s->str += 2;
UTFMODE = 1;
goto getsc_again;
}
}
return (c);
}
static void
getsc_line(Source *s)
{
char *xp = Xstring(s->xs, xp), *cp;
int interactive = Flag(FTALKING) && s->type == SSTDIN;
int have_tty = interactive && (s->flags & SF_TTY);
/* Done here to ensure nothing odd happens when a timeout occurs */
XcheckN(s->xs, xp, LINE);
*xp = '\0';
s->start = s->str = xp;
if (have_tty && ksh_tmout) {
ksh_tmout_state = TMOUT_READING;
alarm(ksh_tmout);
}
if (have_tty && (
#ifndef MKSH_NOVI
Flag(FVI) ||
#endif
Flag(FEMACS) || Flag(FGMACS))) {
int nread;
nread = x_read(xp, LINE);
if (nread < 0) /* read error */
nread = 0;
xp[nread] = '\0';
xp += nread;
} else {
if (interactive) {
pprompt(prompt, 0);
} else
s->line++;
while (1) {
char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
if (!p && shf_error(s->u.shf) &&
shf_errno(s->u.shf) == EINTR) {
shf_clearerr(s->u.shf);
if (trap)
runtraps(0);
continue;
}
if (!p || (xp = p, xp[-1] == '\n'))
break;
/* double buffer size */
xp++; /* move past null so doubling works... */
XcheckN(s->xs, xp, Xlength(s->xs, xp));
xp--; /* ...and move back again */
}
/* flush any unwanted input so other programs/builtins
* can read it. Not very optimal, but less error prone
* than flushing else where, dealing with redirections,
* etc..
* todo: reduce size of shf buffer (~128?) if SSTDIN
*/
if (s->type == SSTDIN)
shf_flush(s->u.shf);
}
/* XXX: temporary kludge to restore source after a
* trap may have been executed.
*/
source = s;
if (have_tty && ksh_tmout) {
ksh_tmout_state = TMOUT_EXECUTING;
alarm(0);
}
cp = Xstring(s->xs, xp);
if (interactive && *cp == '!' && cur_prompt == PS1) {
int linelen;
linelen = Xlength(s->xs, xp);
XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1);
/* reload after potential realloc */
cp = Xstring(s->xs, xp);
/* change initial '!' into space */
*cp = ' ';
/* NUL terminate the current string */
*xp = '\0';
/* move the actual string forward */
memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1);
xp += fc_e_n;
/* prepend it with "fc -e -" */
memcpy(cp, fc_e_, fc_e_n);
}
s->start = s->str = cp;
strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
/* Note: if input is all nulls, this is not eof */
if (Xlength(s->xs, xp) == 0) { /* EOF */
if (s->type == SFILE)
shf_fdclose(s->u.shf);
s->str = NULL;
} else if (interactive && *s->str &&
(cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS)))
histsave(&s->line, s->str, true, true);
if (interactive)
set_prompt(PS2, NULL);
}
void
set_prompt(int to, Source *s)
{
cur_prompt = to;
switch (to) {
case PS1: /* command */
/* Substitute ! and !! here, before substitutions are done
* so ! in expanded variables are not expanded.
* NOTE: this is not what AT&T ksh does (it does it after
* substitutions, POSIX doesn't say which is to be done.
*/
{
struct shf *shf;
char * volatile ps1;
Area *saved_atemp;
ps1 = str_val(global("PS1"));
shf = shf_sopen(NULL, strlen(ps1) * 2,
SHF_WR | SHF_DYNAMIC, NULL);
while (*ps1)
if (*ps1 != '!' || *++ps1 == '!')
shf_putchar(*ps1++, shf);
else
shf_fprintf(shf, "%d",
s ? s->line + 1 : 0);
ps1 = shf_sclose(shf);
saved_atemp = ATEMP;
newenv(E_ERRH);
if (sigsetjmp(e->jbuf, 0)) {
prompt = safe_prompt;
/* Don't print an error - assume it has already
* been printed. Reason is we may have forked
* to run a command and the child may be
* unwinding its stack through this code as it
* exits.
*/
} else {
char *cp = substitute(ps1, 0);
strdupx(prompt, cp, saved_atemp);
}
quitenv(NULL);
}
break;
case PS2: /* command continuation */
prompt = str_val(global("PS2"));
break;
}
}
static int
dopprompt(const char *cp, int ntruncate, bool doprint)
{
int columns = 0, lines = 0, indelimit = 0;
char delimiter = 0;
/* Undocumented AT&T ksh feature:
* If the second char in the prompt string is \r then the first char
* is taken to be a non-printing delimiter and any chars between two
* instances of the delimiter are not considered to be part of the
* prompt length
*/
if (*cp && cp[1] == '\r') {
delimiter = *cp;
cp += 2;
}
for (; *cp; cp++) {
if (indelimit && *cp != delimiter)
;
else if (*cp == '\n' || *cp == '\r') {
lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
columns = 0;
} else if (*cp == '\t') {
columns = (columns | 7) + 1;
} else if (*cp == '\b') {
if (columns > 0)
columns--;
} else if (*cp == delimiter)
indelimit = !indelimit;
else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
const char *cp2;
columns += utf_widthadj(cp, &cp2);
if (doprint && (indelimit ||
(ntruncate < (x_cols * lines + columns))))
shf_write(cp, cp2 - cp, shl_out);
cp = cp2 - /* loop increment */ 1;
continue;
} else
columns++;
if (doprint && (*cp != delimiter) &&
(indelimit || (ntruncate < (x_cols * lines + columns))))
shf_putc(*cp, shl_out);
}
if (doprint)
shf_flush(shl_out);
return (x_cols * lines + columns);
}
void
pprompt(const char *cp, int ntruncate)
{
dopprompt(cp, ntruncate, true);
}
int
promptlen(const char *cp)
{
return (dopprompt(cp, 0, false));
}
/* Read the variable part of a ${...} expression (ie, up to but not including
* the :[-+?=#%] or close-brace.
*/
static char *
get_brace_var(XString *wsp, char *wp)
{
enum parse_state {
PS_INITIAL, PS_SAW_HASH, PS_IDENT,
PS_NUMBER, PS_VAR1, PS_END
} state;
char c;
state = PS_INITIAL;
while (1) {
c = getsc();
/* State machine to figure out where the variable part ends. */
switch (state) {
case PS_INITIAL:
if (c == '#' || c == '!') {
state = PS_SAW_HASH;
break;
}
/* FALLTHRU */
case PS_SAW_HASH:
if (ksh_isalphx(c))
state = PS_IDENT;
else if (ksh_isdigit(c))
state = PS_NUMBER;
else if (ctype(c, C_VAR1))
state = PS_VAR1;
else
state = PS_END;
break;
case PS_IDENT:
if (!ksh_isalnux(c)) {
state = PS_END;
if (c == '[') {
char *tmp, *p;
if (!arraysub(&tmp))
yyerror("missing ]\n");
*wp++ = c;
for (p = tmp; *p; ) {
Xcheck(*wsp, wp);
*wp++ = *p++;
}
afree(tmp, ATEMP);
c = getsc(); /* the ] */
}
}
break;
case PS_NUMBER:
if (!ksh_isdigit(c))
state = PS_END;
break;
case PS_VAR1:
state = PS_END;
break;
case PS_END: /* keep gcc happy */
break;
}
if (state == PS_END) {
*wp++ = '\0'; /* end of variable part */
ungetsc(c);
break;
}
Xcheck(*wsp, wp);
*wp++ = c;
}
return (wp);
}
/*
* Save an array subscript - returns true if matching bracket found, false
* if eof or newline was found.
* (Returned string double null terminated)
*/
static int
arraysub(char **strp)
{
XString ws;
char *wp;
char c;
int depth = 1; /* we are just past the initial [ */
Xinit(ws, wp, 32, ATEMP);
do {
c = getsc();
Xcheck(ws, wp);
*wp++ = c;
if (c == '[')
depth++;
else if (c == ']')
depth--;
} while (depth > 0 && c && c != '\n');
*wp++ = '\0';
*strp = Xclose(ws, wp);
return (depth == 0 ? 1 : 0);
}
/* Unget a char: handles case when we are already at the start of the buffer */
static const char *
ungetsc(int c)
{
if (backslash_skip)
backslash_skip--;
/* Don't unget eof... */
if (source->str == null && c == '\0')
return (source->str);
if (source->str > source->start)
source->str--;
else {
Source *s;
s = pushs(SREREAD, source->areap);
s->ugbuf[0] = c; s->ugbuf[1] = '\0';
s->start = s->str = s->ugbuf;
s->next = source;
source = s;
}
return (source->str);
}
/* Called to get a char that isn't a \newline sequence. */
static int
getsc_bn(void)
{
int c, c2;
if (ignore_backslash_newline)
return (getsc_());
if (backslash_skip == 1) {
backslash_skip = 2;
return (getsc_());
}
backslash_skip = 0;
while (1) {
c = getsc_();
if (c == '\\') {
if ((c2 = getsc_()) == '\n')
/* ignore the \newline; get the next char... */
continue;
ungetsc(c2);
backslash_skip = 1;
}
return (c);
}
}
static Lex_state *
push_state_(State_info *si, Lex_state *old_end)
{
Lex_state *new = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP);
new[0].ls_info.base = old_end;
si->base = &new[0];
si->end = &new[STATE_BSIZE];
return (&new[1]);
}
static Lex_state *
pop_state_(State_info *si, Lex_state *old_end)
{
Lex_state *old_base = si->base;
si->base = old_end->ls_info.base - STATE_BSIZE;
si->end = old_end->ls_info.base;
afree(old_base, ATEMP);
return (si->base + STATE_BSIZE - 1);
}