implement “here strings”

This commit is contained in:
tg 2008-02-26 20:43:11 +00:00
parent 9b1504a221
commit 5ea53a15c7
5 changed files with 139 additions and 21 deletions

49
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.146 2008/02/25 00:58:24 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.147 2008/02/26 20:43:10 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -7,7 +7,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R33 2008/02/24
@(#)MIRBSD KSH R33 2008/02/26
description:
Check version of shell.
category: pdksh
@ -1223,6 +1223,51 @@ expected-stdout:
h\b
done
---
name: heredoc-9a
description:
Check that here strings work.
stdin:
bar="bar
baz"
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<foo
$0 -c "tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<foo"
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<"$bar"
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<'$bar'
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<\$bar
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<-foo
expected-stdout:
sbb
sbb
one
onm
$one
$one
-sbb
---
name: heredoc-9b
description:
Check that a corner case of here strings works like bash
stdin:
fnord=42
bar="bar
\$fnord baz"
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<$bar
expected-stdout:
one $sabeq onm
category: bash
---
name: heredoc-9c
description:
Check that a corner case of here strings works like ksh93, zsh
stdin:
fnord=42
bar="bar
\$fnord baz"
tr '[A-Za-z]' '[N-ZA-Mn-za-m]' <<<$bar
expected-stdout:
one
$sabeq onm
---
name: heredoc-quoting-unsubst
description:
Check for correct handling of quoted characters in

83
lex.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.50 2008/02/26 20:35:24 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.51 2008/02/26 20:43:10 tg Exp $");
/*
* states while lexing word
@ -22,6 +22,7 @@ __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.50 2008/02/26 20:35:24 tg Exp $");
#define STBRACE 12 /* parsing ${..[#%]..} */
#define SLETARRAY 13 /* inside =( ), just copy */
#define SADELIM 14 /* like SBASE, looking for delimiter */
#define SHERESTRING 15 /* parsing <<< string */
/* Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state. */
@ -90,7 +91,7 @@ static int getsc_bn(void);
static char *get_brace_var(XString *, char *);
static int arraysub(char **);
static const char *ungetsc(int);
static void gethere(void);
static void gethere(bool);
static Lex_state *push_state_(State_info *, Lex_state *);
static Lex_state *pop_state_(State_info *, Lex_state *);
@ -186,9 +187,21 @@ yylex(int cf)
/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
statep->ls_state = state;
/* check for here string */
if (state == SHEREDELIM) {
c = getsc();
if (c == '<') {
state = SHERESTRING;
goto accept_nonword;
}
ungetsc(c);
}
/* collect non-special or quoted characters to form word */
while (!((c = getsc()) == 0 ||
((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
ctype(c, C_LEX1)))) {
accept_nonword:
Xcheck(ws, wp);
switch (state) {
case SADELIM:
@ -663,6 +676,28 @@ yylex(int cf)
*wp++ = CHAR, *wp++ = c;
break;
case SHERESTRING: /* <<< delimiter */
if (c == '\\') {
c = getsc();
if (c) { /* trailing \ is lost */
*wp++ = QCHAR;
*wp++ = c;
}
Xstring(ws, wp)[0] = QCHAR;
} else if (c == '\'') {
PUSH_STATE(SSQUOTE);
*wp++ = OQUOTE;
ignore_backslash_newline++;
Xstring(ws, wp)[0] = QCHAR;
} else if (c == '"') {
state = statep->ls_state = SHEREDQUOTE;
*wp++ = OQUOTE;
} else {
*wp++ = CHAR;
*wp++ = c;
}
break;
case SHEREDELIM: /* <<,<<- delimiter */
/* XXX chuck this state (and the next) - use
* the existing states ($ and \`..` should be
@ -694,7 +729,9 @@ yylex(int cf)
case SHEREDQUOTE: /* " in <<,<<- delimiter */
if (c == '"') {
*wp++ = CQUOTE;
state = statep->ls_state = SHEREDELIM;
state = statep->ls_state =
Xstring(ws, wp)[1] == '<' ?
SHERESTRING : SHEREDELIM;
} else {
if (c == '\\') {
switch (c = getsc()) {
@ -739,7 +776,7 @@ yylex(int cf)
yyerror("%s: ')' missing\n", T_synerr);
/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
if (state == SHEREDELIM)
if (state == SHEREDELIM || state == SHERESTRING)
state = SBASE;
dp = Xstring(ws, wp);
@ -801,10 +838,12 @@ yylex(int cf)
else
ungetsc(c2);
} else if (c == '\n') {
gethere();
gethere(false);
if (cf & CONTIN)
goto Again;
}
} else if (c == '\0')
/* need here strings at EOF */
gethere(true);
return (c);
}
@ -818,8 +857,19 @@ yylex(int cf)
ungetsc(c); /* unget terminator */
/* copy word to unprefixed string ident */
for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
*dp++ = *sp++;
sp = yylval.cp;
dp = ident;
if ((cf & HEREDELIM) && (sp[1] == '<'))
while (dp < ident+IDENT)
if ((c = *sp++) == CHAR)
*dp++ = *sp++;
else if ((c == OQUOTE) || (c == CQUOTE))
;
else
break;
else
while (dp < ident+IDENT && (c = *sp++) == CHAR)
*dp++ = *sp++;
/* Make sure the ident array stays '\0' padded */
memset(dp, 0, (ident+IDENT) - dp + 1);
if (c != EOS)
@ -862,12 +912,15 @@ yylex(int cf)
}
static void
gethere(void)
gethere(bool iseof)
{
struct ioword **p;
for (p = heres; p < herep; p++)
readhere(*p);
if (iseof && (*p)->delim[1] != '<')
return;
else
readhere(*p);
herep = heres;
}
@ -886,6 +939,14 @@ readhere(struct ioword *iop)
char *xp;
int xpos;
if (iop->delim[1] == '<') {
xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
c = strlen(xp) - 1;
memmove(xp, xp + 1, c);
xp[c] = '\n';
return;
}
eof = evalstr(iop->delim, 0);
if (!(iop->flag & IOEVAL))

13
mksh.1
View File

@ -1,7 +1,7 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.106 2008/02/25 00:58:26 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.107 2008/02/26 20:43:11 tg Exp $
.\" $OpenBSD: ksh.1,v 1.120 2007/05/31 20:47:44 otto Exp $
.\"
.Dd February 24, 2008
.Dd February 26, 2008
.Dt MKSH 1
.Os MirBSD
.Sh NAME
@ -216,6 +216,7 @@ The meta-characters are used in building the following
.Ql \*(Lt ,
.Ql \*(Lt& ,
.Ql \*(Lt\*(Lt ,
.Ql \*(Lt\*(Lt\*(Lt ,
.Ql \*(Gt ,
.Ql \*(Gt& ,
.Ql \*(Gt\*(Gt ,
@ -1921,6 +1922,14 @@ order.
Same as
.Ic \*(Lt\*(Lt ,
except leading tabs are stripped from lines in the here document.
.It \*(Lt\*(Lt\*(Lt Ar word
Same as
.Ic \*(Lt\*(Lt ,
except that
.Ar word
.Em is
the here document.
This is called a here string.
.It \*(Lt& Ar fd
Standard input is duplicated from file descriptor
.Ar fd .

4
sh.h
View File

@ -8,8 +8,8 @@
/* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */
/* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.186 2008/02/26 20:35:25 tg Exp $"
#define MKSH_VERSION "R33 2008/02/24"
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.187 2008/02/26 20:43:11 tg Exp $"
#define MKSH_VERSION "R33 2008/02/26"
#if HAVE_SYS_PARAM_H
#include <sys/param.h>

11
tree.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.12 2007/10/25 15:19:16 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.13 2008/02/26 20:43:11 tg Exp $");
#define INDENT 4
@ -166,7 +166,8 @@ ptree(struct op *t, int indent, struct shf *shf)
struct ioword *iop = *ioact++;
/* heredoc is 0 when tracing (set -x) */
if ((iop->flag & IOTYPE) == IOHERE && iop->heredoc) {
if ((iop->flag & IOTYPE) == IOHERE && iop->heredoc &&
(!iop->delim || iop->delim[1] != '<')) {
tputc('\n', shf);
shf_puts(iop->heredoc, shf);
fptreef(shf, indent, "%s",
@ -203,9 +204,9 @@ pioact(struct shf *shf, int indent, struct ioword *iop)
break;
case IOHERE:
if (flag&IOSKIP)
fptreef(shf, indent, "<<- ");
fptreef(shf, indent, "<<-");
else
fptreef(shf, indent, "<< ");
fptreef(shf, indent, "<<");
break;
case IOCAT:
fptreef(shf, indent, ">> ");
@ -228,6 +229,8 @@ pioact(struct shf *shf, int indent, struct ioword *iop)
}
/* name/delim are 0 when printing syntax errors */
if (type == IOHERE) {
if ((flag & IOSKIP) || (iop->delim[1] != '<'))
tputc(' ', shf);
if (iop->delim)
fptreef(shf, indent, "%S ", iop->delim);
} else if (iop->name)