• speed optimisation: drop SF_FIRST flag, factor out skipping the

UTF-8 BOM instead (UTFMODE has a separate value now for activated
  during BOM skipping)
• parsing a COMSUB now skips UTF-8 BOM, too, but only temporarily
This commit is contained in:
tg 2011-03-13 16:03:54 +00:00
parent e66eaab861
commit af53a7d16a
6 changed files with 54 additions and 35 deletions

11
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.424 2011/03/13 15:57:21 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.425 2011/03/13 16:03:49 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R39 2011/03/12
@(#)MIRBSD KSH R39 2011/03/13
description:
Check version of shell.
stdin:
@ -5373,7 +5373,7 @@ expected-stdout:
mit
ohne
=
: mit
: ohne
---
name: utf8bom-2
description:
@ -5404,12 +5404,17 @@ expected-stderr-pattern:
name: utf8bom-3
description:
Reading the UTF-8 BOM should enable the utf8-mode flag
(temporarily for COMSUBs)
stdin:
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi'
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi'
"$__progname" -c 'if [[ $- = *U* ]]; then echo 3 on; else echo 3 off; fi; x=$(:; if [[ $- = *U* ]]; then echo 4 on; else echo 4 off; fi); echo $x; if [[ $- = *U* ]]; then echo 5 on; else echo 5 off; fi'
expected-stdout:
1 off
2 on
3 off
4 on
5 off
---
name: utf8opt-1a
description:

6
eval.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.99 2011/03/13 01:20:17 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.100 2011/03/13 16:03:50 tg Exp $");
/*
* string expansion
@ -1182,11 +1182,12 @@ comsub(Expand *xp, const char *cp)
Source *s, *sold;
struct op *t;
struct shf *shf;
uint8_t old_utfmode = UTFMODE;
s = pushs(SSTRING, ATEMP);
s->start = s->str = cp;
sold = source;
t = compile(s);
t = compile(s, true);
afree(s, ATEMP);
source = sold;
@ -1224,6 +1225,7 @@ comsub(Expand *xp, const char *cp)
xp->split = 1;
}
UTFMODE = old_utfmode;
xp->u.shf = shf;
return (XCOM);
}

46
lex.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.138 2011/03/13 15:57:23 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.139 2011/03/13 16:03:51 tg Exp $");
/*
* states while lexing word
@ -103,6 +103,7 @@ static Lex_state *push_state_(State_info *, Lex_state *);
static Lex_state *pop_state_(State_info *, Lex_state *);
static int dopprompt(const char *, int, bool);
void yyskiputf8bom(void);
static int backslash_skip;
static int ignore_backslash_newline;
@ -110,12 +111,10 @@ static struct sretrace_info *retrace_info = NULL;
short comsub_nesting_level = 0;
/* optimised getsc_bn() */
#define _getsc() (*source->str != '\0' && *source->str != '\\' \
&& !backslash_skip && !(source->flags & SF_FIRST) \
? *source->str++ : getsc_bn())
#define _getsc() (*source->str != '\0' && *source->str != '\\' && \
!backslash_skip ? *source->str++ : getsc_bn())
/* optimised getsc__() */
#define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
? *source->str++ : getsc__())
#define _getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
/* retrace helper */
#define _getsc_r(carg) { \
@ -1293,7 +1292,6 @@ getsc__(void)
Source *s = source;
int c;
getsc_again:
while ((c = *s->str++) == 0) {
/* return 0 for EOF by default */
s->str = NULL;
@ -1390,17 +1388,6 @@ getsc__(void)
shf_flush(shl_out);
}
}
/* check for UTF-8 byte order mark */
if (s->flags & SF_FIRST) {
s->flags &= ~SF_FIRST;
if (((unsigned char)c == 0xEF) &&
(((const unsigned char *)(s->str))[0] == 0xBB) &&
(((const unsigned char *)(s->str))[1] == 0xBF)) {
s->str += 2;
UTFMODE = 1;
goto getsc_again;
}
}
return (c);
}
@ -1799,6 +1786,29 @@ getsc_bn(void)
}
}
void
yyskiputf8bom(void)
{
int c;
if ((unsigned char)(c = _getsc_()) != 0xEF) {
ungetsc_(c);
return;
}
if ((unsigned char)(c = _getsc_()) != 0xBB) {
ungetsc_(c);
ungetsc_(0xEF);
return;
}
if ((unsigned char)(c = _getsc_()) != 0xBF) {
ungetsc_(c);
ungetsc_(0xBB);
ungetsc_(0xEF);
return;
}
UTFMODE |= 8;
}
static Lex_state *
push_state_(State_info *si, Lex_state *old_end)
{

9
main.c
View File

@ -33,7 +33,7 @@
#include <locale.h>
#endif
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.180 2011/03/13 01:20:21 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.181 2011/03/13 16:03:52 tg Exp $");
extern char **environ;
@ -663,12 +663,10 @@ shell(Source * volatile s, volatile int toplevel)
volatile int wastty = s->flags & SF_TTY;
volatile int attempts = 13;
volatile int interactive = Flag(FTALKING) && toplevel;
volatile bool sfirst = true;
Source *volatile old_source = source;
int i;
/* enable UTF-8 BOM check */
s->flags |= SF_FIRST;
newenv(E_PARSE);
if (interactive)
really_exit = 0;
@ -730,7 +728,8 @@ shell(Source * volatile s, volatile int toplevel)
j_notify();
set_prompt(PS1, s);
}
t = compile(s);
t = compile(s, sfirst);
sfirst = false;
if (t != NULL && t->type == TEOF) {
if (wastty && Flag(FIGNOREEOF) && --attempts > 0) {
shellf("Use 'exit' to leave mksh\n");

9
sh.h
View File

@ -154,9 +154,9 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.444 2011/03/13 01:20:22 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.445 2011/03/13 16:03:53 tg Exp $");
#endif
#define MKSH_VERSION "R39 2011/03/12"
#define MKSH_VERSION "R39 2011/03/13"
#ifndef MKSH_INCLUDES_ONLY
@ -1353,8 +1353,7 @@ struct source {
#define SF_ALIAS BIT(1) /* faking space at end of alias */
#define SF_ALIASEND BIT(2) /* faking space at end of alias */
#define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */
#define SF_FIRST BIT(4) /* initial state (to ignore UTF-8 BOM) */
#define SF_HASALIAS BIT(5) /* u.tblp valid (SALIAS, SEOF) */
#define SF_HASALIAS BIT(4) /* u.tblp valid (SALIAS, SEOF) */
typedef union {
int i;
@ -1741,7 +1740,7 @@ int shf_vfprintf(struct shf *, const char *, va_list)
MKSH_A_FORMAT(printf, 2, 0);
/* syn.c */
void initkeywords(void);
struct op *compile(Source *);
struct op *compile(Source *, bool);
bool parse_usec(const char *, struct timeval *);
char *yyrecursive(void);
/* tree.c */

8
syn.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.56 2011/03/13 01:20:24 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.57 2011/03/13 16:03:54 tg Exp $");
struct nesting_state {
int start_token; /* token than began nesting (eg, FOR) */
@ -875,12 +875,16 @@ newtp(int type)
}
struct op *
compile(Source *s)
compile(Source *s, bool skiputf8bom)
{
extern void yyskiputf8bom(void);
nesting.start_token = 0;
nesting.start_line = 0;
herep = heres;
source = s;
if (skiputf8bom)
yyskiputf8bom();
yyparse();
return (outtree);
}