diff --git a/check.t b/check.t index d9eaefd..6f75195 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.424 2011/03/13 15:57:21 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.425 2011/03/13 16:03:49 tg Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ @@ -25,7 +25,7 @@ # http://www.research.att.com/~gsf/public/ifs.sh expected-stdout: - @(#)MIRBSD KSH R39 2011/03/12 + @(#)MIRBSD KSH R39 2011/03/13 description: Check version of shell. stdin: @@ -5373,7 +5373,7 @@ expected-stdout: mit ohne = - : mit + : ohne --- name: utf8bom-2 description: @@ -5404,12 +5404,17 @@ expected-stderr-pattern: name: utf8bom-3 description: Reading the UTF-8 BOM should enable the utf8-mode flag + (temporarily for COMSUBs) stdin: "$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi' "$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi' + "$__progname" -c 'if [[ $- = *U* ]]; then echo 3 on; else echo 3 off; fi; x=$(:; if [[ $- = *U* ]]; then echo 4 on; else echo 4 off; fi); echo $x; if [[ $- = *U* ]]; then echo 5 on; else echo 5 off; fi' expected-stdout: 1 off 2 on + 3 off + 4 on + 5 off --- name: utf8opt-1a description: diff --git a/eval.c b/eval.c index 6e69cdc..b693d4d 100644 --- a/eval.c +++ b/eval.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.99 2011/03/13 01:20:17 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.100 2011/03/13 16:03:50 tg Exp $"); /* * string expansion @@ -1182,11 +1182,12 @@ comsub(Expand *xp, const char *cp) Source *s, *sold; struct op *t; struct shf *shf; + uint8_t old_utfmode = UTFMODE; s = pushs(SSTRING, ATEMP); s->start = s->str = cp; sold = source; - t = compile(s); + t = compile(s, true); afree(s, ATEMP); source = sold; @@ -1224,6 +1225,7 @@ comsub(Expand *xp, const char *cp) xp->split = 1; } + UTFMODE = old_utfmode; xp->u.shf = shf; return (XCOM); } diff --git a/lex.c b/lex.c index 8696797..597dead 100644 --- a/lex.c +++ b/lex.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.138 2011/03/13 15:57:23 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.139 2011/03/13 16:03:51 tg Exp $"); /* * states while lexing word @@ -103,6 +103,7 @@ static Lex_state *push_state_(State_info *, Lex_state *); static Lex_state *pop_state_(State_info *, Lex_state *); static int dopprompt(const char *, int, bool); +void yyskiputf8bom(void); static int backslash_skip; static int ignore_backslash_newline; @@ -110,12 +111,10 @@ static struct sretrace_info *retrace_info = NULL; short comsub_nesting_level = 0; /* optimised getsc_bn() */ -#define _getsc() (*source->str != '\0' && *source->str != '\\' \ - && !backslash_skip && !(source->flags & SF_FIRST) \ - ? *source->str++ : getsc_bn()) +#define _getsc() (*source->str != '\0' && *source->str != '\\' && \ + !backslash_skip ? *source->str++ : getsc_bn()) /* optimised getsc__() */ -#define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \ - ? *source->str++ : getsc__()) +#define _getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) /* retrace helper */ #define _getsc_r(carg) { \ @@ -1293,7 +1292,6 @@ getsc__(void) Source *s = source; int c; - getsc_again: while ((c = *s->str++) == 0) { /* return 0 for EOF by default */ s->str = NULL; @@ -1390,17 +1388,6 @@ getsc__(void) shf_flush(shl_out); } } - /* check for UTF-8 byte order mark */ - if (s->flags & SF_FIRST) { - s->flags &= ~SF_FIRST; - if (((unsigned char)c == 0xEF) && - (((const unsigned char *)(s->str))[0] == 0xBB) && - (((const unsigned char *)(s->str))[1] == 0xBF)) { - s->str += 2; - UTFMODE = 1; - goto getsc_again; - } - } return (c); } @@ -1799,6 +1786,29 @@ getsc_bn(void) } } +void +yyskiputf8bom(void) +{ + int c; + + if ((unsigned char)(c = _getsc_()) != 0xEF) { + ungetsc_(c); + return; + } + if ((unsigned char)(c = _getsc_()) != 0xBB) { + ungetsc_(c); + ungetsc_(0xEF); + return; + } + if ((unsigned char)(c = _getsc_()) != 0xBF) { + ungetsc_(c); + ungetsc_(0xBB); + ungetsc_(0xEF); + return; + } + UTFMODE |= 8; +} + static Lex_state * push_state_(State_info *si, Lex_state *old_end) { diff --git a/main.c b/main.c index 5bc15ce..30e9e44 100644 --- a/main.c +++ b/main.c @@ -33,7 +33,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/main.c,v 1.180 2011/03/13 01:20:21 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/main.c,v 1.181 2011/03/13 16:03:52 tg Exp $"); extern char **environ; @@ -663,12 +663,10 @@ shell(Source * volatile s, volatile int toplevel) volatile int wastty = s->flags & SF_TTY; volatile int attempts = 13; volatile int interactive = Flag(FTALKING) && toplevel; + volatile bool sfirst = true; Source *volatile old_source = source; int i; - /* enable UTF-8 BOM check */ - s->flags |= SF_FIRST; - newenv(E_PARSE); if (interactive) really_exit = 0; @@ -730,7 +728,8 @@ shell(Source * volatile s, volatile int toplevel) j_notify(); set_prompt(PS1, s); } - t = compile(s); + t = compile(s, sfirst); + sfirst = false; if (t != NULL && t->type == TEOF) { if (wastty && Flag(FIGNOREEOF) && --attempts > 0) { shellf("Use 'exit' to leave mksh\n"); diff --git a/sh.h b/sh.h index 516f929..ec844c1 100644 --- a/sh.h +++ b/sh.h @@ -154,9 +154,9 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.444 2011/03/13 01:20:22 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.445 2011/03/13 16:03:53 tg Exp $"); #endif -#define MKSH_VERSION "R39 2011/03/12" +#define MKSH_VERSION "R39 2011/03/13" #ifndef MKSH_INCLUDES_ONLY @@ -1353,8 +1353,7 @@ struct source { #define SF_ALIAS BIT(1) /* faking space at end of alias */ #define SF_ALIASEND BIT(2) /* faking space at end of alias */ #define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */ -#define SF_FIRST BIT(4) /* initial state (to ignore UTF-8 BOM) */ -#define SF_HASALIAS BIT(5) /* u.tblp valid (SALIAS, SEOF) */ +#define SF_HASALIAS BIT(4) /* u.tblp valid (SALIAS, SEOF) */ typedef union { int i; @@ -1741,7 +1740,7 @@ int shf_vfprintf(struct shf *, const char *, va_list) MKSH_A_FORMAT(printf, 2, 0); /* syn.c */ void initkeywords(void); -struct op *compile(Source *); +struct op *compile(Source *, bool); bool parse_usec(const char *, struct timeval *); char *yyrecursive(void); /* tree.c */ diff --git a/syn.c b/syn.c index 75f4b99..ca398ce 100644 --- a/syn.c +++ b/syn.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.56 2011/03/13 01:20:24 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.57 2011/03/13 16:03:54 tg Exp $"); struct nesting_state { int start_token; /* token than began nesting (eg, FOR) */ @@ -875,12 +875,16 @@ newtp(int type) } struct op * -compile(Source *s) +compile(Source *s, bool skiputf8bom) { + extern void yyskiputf8bom(void); + nesting.start_token = 0; nesting.start_line = 0; herep = heres; source = s; + if (skiputf8bom) + yyskiputf8bom(); yyparse(); return (outtree); }