• speed optimisation: drop SF_FIRST flag, factor out skipping the
UTF-8 BOM instead (UTFMODE has a separate value now for activated during BOM skipping) • parsing a COMSUB now skips UTF-8 BOM, too, but only temporarily
This commit is contained in:
parent
e66eaab861
commit
af53a7d16a
11
check.t
11
check.t
@ -1,4 +1,4 @@
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.424 2011/03/13 15:57:21 tg Exp $
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.425 2011/03/13 16:03:49 tg Exp $
|
||||
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
||||
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
||||
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
||||
@ -25,7 +25,7 @@
|
||||
# http://www.research.att.com/~gsf/public/ifs.sh
|
||||
|
||||
expected-stdout:
|
||||
@(#)MIRBSD KSH R39 2011/03/12
|
||||
@(#)MIRBSD KSH R39 2011/03/13
|
||||
description:
|
||||
Check version of shell.
|
||||
stdin:
|
||||
@ -5373,7 +5373,7 @@ expected-stdout:
|
||||
mit
|
||||
ohne
|
||||
=
|
||||
: mit
|
||||
: ohne
|
||||
---
|
||||
name: utf8bom-2
|
||||
description:
|
||||
@ -5404,12 +5404,17 @@ expected-stderr-pattern:
|
||||
name: utf8bom-3
|
||||
description:
|
||||
Reading the UTF-8 BOM should enable the utf8-mode flag
|
||||
(temporarily for COMSUBs)
|
||||
stdin:
|
||||
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi'
|
||||
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi'
|
||||
"$__progname" -c 'if [[ $- = *U* ]]; then echo 3 on; else echo 3 off; fi; x=$(:; if [[ $- = *U* ]]; then echo 4 on; else echo 4 off; fi); echo $x; if [[ $- = *U* ]]; then echo 5 on; else echo 5 off; fi'
|
||||
expected-stdout:
|
||||
1 off
|
||||
2 on
|
||||
3 off
|
||||
4 on
|
||||
5 off
|
||||
---
|
||||
name: utf8opt-1a
|
||||
description:
|
||||
|
6
eval.c
6
eval.c
@ -22,7 +22,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.99 2011/03/13 01:20:17 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.100 2011/03/13 16:03:50 tg Exp $");
|
||||
|
||||
/*
|
||||
* string expansion
|
||||
@ -1182,11 +1182,12 @@ comsub(Expand *xp, const char *cp)
|
||||
Source *s, *sold;
|
||||
struct op *t;
|
||||
struct shf *shf;
|
||||
uint8_t old_utfmode = UTFMODE;
|
||||
|
||||
s = pushs(SSTRING, ATEMP);
|
||||
s->start = s->str = cp;
|
||||
sold = source;
|
||||
t = compile(s);
|
||||
t = compile(s, true);
|
||||
afree(s, ATEMP);
|
||||
source = sold;
|
||||
|
||||
@ -1224,6 +1225,7 @@ comsub(Expand *xp, const char *cp)
|
||||
xp->split = 1;
|
||||
}
|
||||
|
||||
UTFMODE = old_utfmode;
|
||||
xp->u.shf = shf;
|
||||
return (XCOM);
|
||||
}
|
||||
|
46
lex.c
46
lex.c
@ -22,7 +22,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.138 2011/03/13 15:57:23 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.139 2011/03/13 16:03:51 tg Exp $");
|
||||
|
||||
/*
|
||||
* states while lexing word
|
||||
@ -103,6 +103,7 @@ static Lex_state *push_state_(State_info *, Lex_state *);
|
||||
static Lex_state *pop_state_(State_info *, Lex_state *);
|
||||
|
||||
static int dopprompt(const char *, int, bool);
|
||||
void yyskiputf8bom(void);
|
||||
|
||||
static int backslash_skip;
|
||||
static int ignore_backslash_newline;
|
||||
@ -110,12 +111,10 @@ static struct sretrace_info *retrace_info = NULL;
|
||||
short comsub_nesting_level = 0;
|
||||
|
||||
/* optimised getsc_bn() */
|
||||
#define _getsc() (*source->str != '\0' && *source->str != '\\' \
|
||||
&& !backslash_skip && !(source->flags & SF_FIRST) \
|
||||
? *source->str++ : getsc_bn())
|
||||
#define _getsc() (*source->str != '\0' && *source->str != '\\' && \
|
||||
!backslash_skip ? *source->str++ : getsc_bn())
|
||||
/* optimised getsc__() */
|
||||
#define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
|
||||
? *source->str++ : getsc__())
|
||||
#define _getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
|
||||
|
||||
/* retrace helper */
|
||||
#define _getsc_r(carg) { \
|
||||
@ -1293,7 +1292,6 @@ getsc__(void)
|
||||
Source *s = source;
|
||||
int c;
|
||||
|
||||
getsc_again:
|
||||
while ((c = *s->str++) == 0) {
|
||||
/* return 0 for EOF by default */
|
||||
s->str = NULL;
|
||||
@ -1390,17 +1388,6 @@ getsc__(void)
|
||||
shf_flush(shl_out);
|
||||
}
|
||||
}
|
||||
/* check for UTF-8 byte order mark */
|
||||
if (s->flags & SF_FIRST) {
|
||||
s->flags &= ~SF_FIRST;
|
||||
if (((unsigned char)c == 0xEF) &&
|
||||
(((const unsigned char *)(s->str))[0] == 0xBB) &&
|
||||
(((const unsigned char *)(s->str))[1] == 0xBF)) {
|
||||
s->str += 2;
|
||||
UTFMODE = 1;
|
||||
goto getsc_again;
|
||||
}
|
||||
}
|
||||
return (c);
|
||||
}
|
||||
|
||||
@ -1799,6 +1786,29 @@ getsc_bn(void)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
yyskiputf8bom(void)
|
||||
{
|
||||
int c;
|
||||
|
||||
if ((unsigned char)(c = _getsc_()) != 0xEF) {
|
||||
ungetsc_(c);
|
||||
return;
|
||||
}
|
||||
if ((unsigned char)(c = _getsc_()) != 0xBB) {
|
||||
ungetsc_(c);
|
||||
ungetsc_(0xEF);
|
||||
return;
|
||||
}
|
||||
if ((unsigned char)(c = _getsc_()) != 0xBF) {
|
||||
ungetsc_(c);
|
||||
ungetsc_(0xBB);
|
||||
ungetsc_(0xEF);
|
||||
return;
|
||||
}
|
||||
UTFMODE |= 8;
|
||||
}
|
||||
|
||||
static Lex_state *
|
||||
push_state_(State_info *si, Lex_state *old_end)
|
||||
{
|
||||
|
9
main.c
9
main.c
@ -33,7 +33,7 @@
|
||||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.180 2011/03/13 01:20:21 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.181 2011/03/13 16:03:52 tg Exp $");
|
||||
|
||||
extern char **environ;
|
||||
|
||||
@ -663,12 +663,10 @@ shell(Source * volatile s, volatile int toplevel)
|
||||
volatile int wastty = s->flags & SF_TTY;
|
||||
volatile int attempts = 13;
|
||||
volatile int interactive = Flag(FTALKING) && toplevel;
|
||||
volatile bool sfirst = true;
|
||||
Source *volatile old_source = source;
|
||||
int i;
|
||||
|
||||
/* enable UTF-8 BOM check */
|
||||
s->flags |= SF_FIRST;
|
||||
|
||||
newenv(E_PARSE);
|
||||
if (interactive)
|
||||
really_exit = 0;
|
||||
@ -730,7 +728,8 @@ shell(Source * volatile s, volatile int toplevel)
|
||||
j_notify();
|
||||
set_prompt(PS1, s);
|
||||
}
|
||||
t = compile(s);
|
||||
t = compile(s, sfirst);
|
||||
sfirst = false;
|
||||
if (t != NULL && t->type == TEOF) {
|
||||
if (wastty && Flag(FIGNOREEOF) && --attempts > 0) {
|
||||
shellf("Use 'exit' to leave mksh\n");
|
||||
|
9
sh.h
9
sh.h
@ -154,9 +154,9 @@
|
||||
#endif
|
||||
|
||||
#ifdef EXTERN
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.444 2011/03/13 01:20:22 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.445 2011/03/13 16:03:53 tg Exp $");
|
||||
#endif
|
||||
#define MKSH_VERSION "R39 2011/03/12"
|
||||
#define MKSH_VERSION "R39 2011/03/13"
|
||||
|
||||
#ifndef MKSH_INCLUDES_ONLY
|
||||
|
||||
@ -1353,8 +1353,7 @@ struct source {
|
||||
#define SF_ALIAS BIT(1) /* faking space at end of alias */
|
||||
#define SF_ALIASEND BIT(2) /* faking space at end of alias */
|
||||
#define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */
|
||||
#define SF_FIRST BIT(4) /* initial state (to ignore UTF-8 BOM) */
|
||||
#define SF_HASALIAS BIT(5) /* u.tblp valid (SALIAS, SEOF) */
|
||||
#define SF_HASALIAS BIT(4) /* u.tblp valid (SALIAS, SEOF) */
|
||||
|
||||
typedef union {
|
||||
int i;
|
||||
@ -1741,7 +1740,7 @@ int shf_vfprintf(struct shf *, const char *, va_list)
|
||||
MKSH_A_FORMAT(printf, 2, 0);
|
||||
/* syn.c */
|
||||
void initkeywords(void);
|
||||
struct op *compile(Source *);
|
||||
struct op *compile(Source *, bool);
|
||||
bool parse_usec(const char *, struct timeval *);
|
||||
char *yyrecursive(void);
|
||||
/* tree.c */
|
||||
|
8
syn.c
8
syn.c
@ -22,7 +22,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.56 2011/03/13 01:20:24 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.57 2011/03/13 16:03:54 tg Exp $");
|
||||
|
||||
struct nesting_state {
|
||||
int start_token; /* token than began nesting (eg, FOR) */
|
||||
@ -875,12 +875,16 @@ newtp(int type)
|
||||
}
|
||||
|
||||
struct op *
|
||||
compile(Source *s)
|
||||
compile(Source *s, bool skiputf8bom)
|
||||
{
|
||||
extern void yyskiputf8bom(void);
|
||||
|
||||
nesting.start_token = 0;
|
||||
nesting.start_line = 0;
|
||||
herep = heres;
|
||||
source = s;
|
||||
if (skiputf8bom)
|
||||
yyskiputf8bom();
|
||||
yyparse();
|
||||
return (outtree);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user