• speed optimisation: drop SF_FIRST flag, factor out skipping the

UTF-8 BOM instead (UTFMODE has a separate value now for activated
  during BOM skipping)
• parsing a COMSUB now skips UTF-8 BOM, too, but only temporarily
This commit is contained in:
tg 2011-03-13 16:03:54 +00:00
parent e66eaab861
commit af53a7d16a
6 changed files with 54 additions and 35 deletions

11
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.424 2011/03/13 15:57:21 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.425 2011/03/13 16:03:49 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh # http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout: expected-stdout:
@(#)MIRBSD KSH R39 2011/03/12 @(#)MIRBSD KSH R39 2011/03/13
description: description:
Check version of shell. Check version of shell.
stdin: stdin:
@ -5373,7 +5373,7 @@ expected-stdout:
mit mit
ohne ohne
= =
: mit : ohne
--- ---
name: utf8bom-2 name: utf8bom-2
description: description:
@ -5404,12 +5404,17 @@ expected-stderr-pattern:
name: utf8bom-3 name: utf8bom-3
description: description:
Reading the UTF-8 BOM should enable the utf8-mode flag Reading the UTF-8 BOM should enable the utf8-mode flag
(temporarily for COMSUBs)
stdin: stdin:
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi' "$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi'
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi' "$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi'
"$__progname" -c 'if [[ $- = *U* ]]; then echo 3 on; else echo 3 off; fi; x=$(:; if [[ $- = *U* ]]; then echo 4 on; else echo 4 off; fi); echo $x; if [[ $- = *U* ]]; then echo 5 on; else echo 5 off; fi'
expected-stdout: expected-stdout:
1 off 1 off
2 on 2 on
3 off
4 on
5 off
--- ---
name: utf8opt-1a name: utf8opt-1a
description: description:

6
eval.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.99 2011/03/13 01:20:17 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.100 2011/03/13 16:03:50 tg Exp $");
/* /*
* string expansion * string expansion
@ -1182,11 +1182,12 @@ comsub(Expand *xp, const char *cp)
Source *s, *sold; Source *s, *sold;
struct op *t; struct op *t;
struct shf *shf; struct shf *shf;
uint8_t old_utfmode = UTFMODE;
s = pushs(SSTRING, ATEMP); s = pushs(SSTRING, ATEMP);
s->start = s->str = cp; s->start = s->str = cp;
sold = source; sold = source;
t = compile(s); t = compile(s, true);
afree(s, ATEMP); afree(s, ATEMP);
source = sold; source = sold;
@ -1224,6 +1225,7 @@ comsub(Expand *xp, const char *cp)
xp->split = 1; xp->split = 1;
} }
UTFMODE = old_utfmode;
xp->u.shf = shf; xp->u.shf = shf;
return (XCOM); return (XCOM);
} }

46
lex.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.138 2011/03/13 15:57:23 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.139 2011/03/13 16:03:51 tg Exp $");
/* /*
* states while lexing word * states while lexing word
@ -103,6 +103,7 @@ static Lex_state *push_state_(State_info *, Lex_state *);
static Lex_state *pop_state_(State_info *, Lex_state *); static Lex_state *pop_state_(State_info *, Lex_state *);
static int dopprompt(const char *, int, bool); static int dopprompt(const char *, int, bool);
void yyskiputf8bom(void);
static int backslash_skip; static int backslash_skip;
static int ignore_backslash_newline; static int ignore_backslash_newline;
@ -110,12 +111,10 @@ static struct sretrace_info *retrace_info = NULL;
short comsub_nesting_level = 0; short comsub_nesting_level = 0;
/* optimised getsc_bn() */ /* optimised getsc_bn() */
#define _getsc() (*source->str != '\0' && *source->str != '\\' \ #define _getsc() (*source->str != '\0' && *source->str != '\\' && \
&& !backslash_skip && !(source->flags & SF_FIRST) \ !backslash_skip ? *source->str++ : getsc_bn())
? *source->str++ : getsc_bn())
/* optimised getsc__() */ /* optimised getsc__() */
#define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \ #define _getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
? *source->str++ : getsc__())
/* retrace helper */ /* retrace helper */
#define _getsc_r(carg) { \ #define _getsc_r(carg) { \
@ -1293,7 +1292,6 @@ getsc__(void)
Source *s = source; Source *s = source;
int c; int c;
getsc_again:
while ((c = *s->str++) == 0) { while ((c = *s->str++) == 0) {
/* return 0 for EOF by default */ /* return 0 for EOF by default */
s->str = NULL; s->str = NULL;
@ -1390,17 +1388,6 @@ getsc__(void)
shf_flush(shl_out); shf_flush(shl_out);
} }
} }
/* check for UTF-8 byte order mark */
if (s->flags & SF_FIRST) {
s->flags &= ~SF_FIRST;
if (((unsigned char)c == 0xEF) &&
(((const unsigned char *)(s->str))[0] == 0xBB) &&
(((const unsigned char *)(s->str))[1] == 0xBF)) {
s->str += 2;
UTFMODE = 1;
goto getsc_again;
}
}
return (c); return (c);
} }
@ -1799,6 +1786,29 @@ getsc_bn(void)
} }
} }
void
yyskiputf8bom(void)
{
int c;
if ((unsigned char)(c = _getsc_()) != 0xEF) {
ungetsc_(c);
return;
}
if ((unsigned char)(c = _getsc_()) != 0xBB) {
ungetsc_(c);
ungetsc_(0xEF);
return;
}
if ((unsigned char)(c = _getsc_()) != 0xBF) {
ungetsc_(c);
ungetsc_(0xBB);
ungetsc_(0xEF);
return;
}
UTFMODE |= 8;
}
static Lex_state * static Lex_state *
push_state_(State_info *si, Lex_state *old_end) push_state_(State_info *si, Lex_state *old_end)
{ {

9
main.c
View File

@ -33,7 +33,7 @@
#include <locale.h> #include <locale.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.180 2011/03/13 01:20:21 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/main.c,v 1.181 2011/03/13 16:03:52 tg Exp $");
extern char **environ; extern char **environ;
@ -663,12 +663,10 @@ shell(Source * volatile s, volatile int toplevel)
volatile int wastty = s->flags & SF_TTY; volatile int wastty = s->flags & SF_TTY;
volatile int attempts = 13; volatile int attempts = 13;
volatile int interactive = Flag(FTALKING) && toplevel; volatile int interactive = Flag(FTALKING) && toplevel;
volatile bool sfirst = true;
Source *volatile old_source = source; Source *volatile old_source = source;
int i; int i;
/* enable UTF-8 BOM check */
s->flags |= SF_FIRST;
newenv(E_PARSE); newenv(E_PARSE);
if (interactive) if (interactive)
really_exit = 0; really_exit = 0;
@ -730,7 +728,8 @@ shell(Source * volatile s, volatile int toplevel)
j_notify(); j_notify();
set_prompt(PS1, s); set_prompt(PS1, s);
} }
t = compile(s); t = compile(s, sfirst);
sfirst = false;
if (t != NULL && t->type == TEOF) { if (t != NULL && t->type == TEOF) {
if (wastty && Flag(FIGNOREEOF) && --attempts > 0) { if (wastty && Flag(FIGNOREEOF) && --attempts > 0) {
shellf("Use 'exit' to leave mksh\n"); shellf("Use 'exit' to leave mksh\n");

9
sh.h
View File

@ -154,9 +154,9 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.444 2011/03/13 01:20:22 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.445 2011/03/13 16:03:53 tg Exp $");
#endif #endif
#define MKSH_VERSION "R39 2011/03/12" #define MKSH_VERSION "R39 2011/03/13"
#ifndef MKSH_INCLUDES_ONLY #ifndef MKSH_INCLUDES_ONLY
@ -1353,8 +1353,7 @@ struct source {
#define SF_ALIAS BIT(1) /* faking space at end of alias */ #define SF_ALIAS BIT(1) /* faking space at end of alias */
#define SF_ALIASEND BIT(2) /* faking space at end of alias */ #define SF_ALIASEND BIT(2) /* faking space at end of alias */
#define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */ #define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */
#define SF_FIRST BIT(4) /* initial state (to ignore UTF-8 BOM) */ #define SF_HASALIAS BIT(4) /* u.tblp valid (SALIAS, SEOF) */
#define SF_HASALIAS BIT(5) /* u.tblp valid (SALIAS, SEOF) */
typedef union { typedef union {
int i; int i;
@ -1741,7 +1740,7 @@ int shf_vfprintf(struct shf *, const char *, va_list)
MKSH_A_FORMAT(printf, 2, 0); MKSH_A_FORMAT(printf, 2, 0);
/* syn.c */ /* syn.c */
void initkeywords(void); void initkeywords(void);
struct op *compile(Source *); struct op *compile(Source *, bool);
bool parse_usec(const char *, struct timeval *); bool parse_usec(const char *, struct timeval *);
char *yyrecursive(void); char *yyrecursive(void);
/* tree.c */ /* tree.c */

8
syn.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.56 2011/03/13 01:20:24 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.57 2011/03/13 16:03:54 tg Exp $");
struct nesting_state { struct nesting_state {
int start_token; /* token than began nesting (eg, FOR) */ int start_token; /* token than began nesting (eg, FOR) */
@ -875,12 +875,16 @@ newtp(int type)
} }
struct op * struct op *
compile(Source *s) compile(Source *s, bool skiputf8bom)
{ {
extern void yyskiputf8bom(void);
nesting.start_token = 0; nesting.start_token = 0;
nesting.start_line = 0; nesting.start_line = 0;
herep = heres; herep = heres;
source = s; source = s;
if (skiputf8bom)
yyskiputf8bom();
yyparse(); yyparse();
return (outtree); return (outtree);
} }