• speed optimisation: drop SF_FIRST flag, factor out skipping the
UTF-8 BOM instead (UTFMODE has a separate value now for activated during BOM skipping) • parsing a COMSUB now skips UTF-8 BOM, too, but only temporarily
This commit is contained in:
parent
e66eaab861
commit
af53a7d16a
11
check.t
11
check.t
@ -1,4 +1,4 @@
|
|||||||
# $MirOS: src/bin/mksh/check.t,v 1.424 2011/03/13 15:57:21 tg Exp $
|
# $MirOS: src/bin/mksh/check.t,v 1.425 2011/03/13 16:03:49 tg Exp $
|
||||||
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
||||||
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
||||||
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
||||||
@ -25,7 +25,7 @@
|
|||||||
# http://www.research.att.com/~gsf/public/ifs.sh
|
# http://www.research.att.com/~gsf/public/ifs.sh
|
||||||
|
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
@(#)MIRBSD KSH R39 2011/03/12
|
@(#)MIRBSD KSH R39 2011/03/13
|
||||||
description:
|
description:
|
||||||
Check version of shell.
|
Check version of shell.
|
||||||
stdin:
|
stdin:
|
||||||
@ -5373,7 +5373,7 @@ expected-stdout:
|
|||||||
mit
|
mit
|
||||||
ohne
|
ohne
|
||||||
=
|
=
|
||||||
: mit
|
: ohne
|
||||||
---
|
---
|
||||||
name: utf8bom-2
|
name: utf8bom-2
|
||||||
description:
|
description:
|
||||||
@ -5404,12 +5404,17 @@ expected-stderr-pattern:
|
|||||||
name: utf8bom-3
|
name: utf8bom-3
|
||||||
description:
|
description:
|
||||||
Reading the UTF-8 BOM should enable the utf8-mode flag
|
Reading the UTF-8 BOM should enable the utf8-mode flag
|
||||||
|
(temporarily for COMSUBs)
|
||||||
stdin:
|
stdin:
|
||||||
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi'
|
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 1 on; else echo 1 off; fi'
|
||||||
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi'
|
"$__progname" -c ':; if [[ $- = *U* ]]; then echo 2 on; else echo 2 off; fi'
|
||||||
|
"$__progname" -c 'if [[ $- = *U* ]]; then echo 3 on; else echo 3 off; fi; x=$(:; if [[ $- = *U* ]]; then echo 4 on; else echo 4 off; fi); echo $x; if [[ $- = *U* ]]; then echo 5 on; else echo 5 off; fi'
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
1 off
|
1 off
|
||||||
2 on
|
2 on
|
||||||
|
3 off
|
||||||
|
4 on
|
||||||
|
5 off
|
||||||
---
|
---
|
||||||
name: utf8opt-1a
|
name: utf8opt-1a
|
||||||
description:
|
description:
|
||||||
|
6
eval.c
6
eval.c
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.99 2011/03/13 01:20:17 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.100 2011/03/13 16:03:50 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* string expansion
|
* string expansion
|
||||||
@ -1182,11 +1182,12 @@ comsub(Expand *xp, const char *cp)
|
|||||||
Source *s, *sold;
|
Source *s, *sold;
|
||||||
struct op *t;
|
struct op *t;
|
||||||
struct shf *shf;
|
struct shf *shf;
|
||||||
|
uint8_t old_utfmode = UTFMODE;
|
||||||
|
|
||||||
s = pushs(SSTRING, ATEMP);
|
s = pushs(SSTRING, ATEMP);
|
||||||
s->start = s->str = cp;
|
s->start = s->str = cp;
|
||||||
sold = source;
|
sold = source;
|
||||||
t = compile(s);
|
t = compile(s, true);
|
||||||
afree(s, ATEMP);
|
afree(s, ATEMP);
|
||||||
source = sold;
|
source = sold;
|
||||||
|
|
||||||
@ -1224,6 +1225,7 @@ comsub(Expand *xp, const char *cp)
|
|||||||
xp->split = 1;
|
xp->split = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UTFMODE = old_utfmode;
|
||||||
xp->u.shf = shf;
|
xp->u.shf = shf;
|
||||||
return (XCOM);
|
return (XCOM);
|
||||||
}
|
}
|
||||||
|
46
lex.c
46
lex.c
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.138 2011/03/13 15:57:23 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.139 2011/03/13 16:03:51 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* states while lexing word
|
* states while lexing word
|
||||||
@ -103,6 +103,7 @@ static Lex_state *push_state_(State_info *, Lex_state *);
|
|||||||
static Lex_state *pop_state_(State_info *, Lex_state *);
|
static Lex_state *pop_state_(State_info *, Lex_state *);
|
||||||
|
|
||||||
static int dopprompt(const char *, int, bool);
|
static int dopprompt(const char *, int, bool);
|
||||||
|
void yyskiputf8bom(void);
|
||||||
|
|
||||||
static int backslash_skip;
|
static int backslash_skip;
|
||||||
static int ignore_backslash_newline;
|
static int ignore_backslash_newline;
|
||||||
@ -110,12 +111,10 @@ static struct sretrace_info *retrace_info = NULL;
|
|||||||
short comsub_nesting_level = 0;
|
short comsub_nesting_level = 0;
|
||||||
|
|
||||||
/* optimised getsc_bn() */
|
/* optimised getsc_bn() */
|
||||||
#define _getsc() (*source->str != '\0' && *source->str != '\\' \
|
#define _getsc() (*source->str != '\0' && *source->str != '\\' && \
|
||||||
&& !backslash_skip && !(source->flags & SF_FIRST) \
|
!backslash_skip ? *source->str++ : getsc_bn())
|
||||||
? *source->str++ : getsc_bn())
|
|
||||||
/* optimised getsc__() */
|
/* optimised getsc__() */
|
||||||
#define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
|
#define _getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
|
||||||
? *source->str++ : getsc__())
|
|
||||||
|
|
||||||
/* retrace helper */
|
/* retrace helper */
|
||||||
#define _getsc_r(carg) { \
|
#define _getsc_r(carg) { \
|
||||||
@ -1293,7 +1292,6 @@ getsc__(void)
|
|||||||
Source *s = source;
|
Source *s = source;
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
getsc_again:
|
|
||||||
while ((c = *s->str++) == 0) {
|
while ((c = *s->str++) == 0) {
|
||||||
/* return 0 for EOF by default */
|
/* return 0 for EOF by default */
|
||||||
s->str = NULL;
|
s->str = NULL;
|
||||||
@ -1390,17 +1388,6 @@ getsc__(void)
|
|||||||
shf_flush(shl_out);
|
shf_flush(shl_out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* check for UTF-8 byte order mark */
|
|
||||||
if (s->flags & SF_FIRST) {
|
|
||||||
s->flags &= ~SF_FIRST;
|
|
||||||
if (((unsigned char)c == 0xEF) &&
|
|
||||||
(((const unsigned char *)(s->str))[0] == 0xBB) &&
|
|
||||||
(((const unsigned char *)(s->str))[1] == 0xBF)) {
|
|
||||||
s->str += 2;
|
|
||||||
UTFMODE = 1;
|
|
||||||
goto getsc_again;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (c);
|
return (c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1799,6 +1786,29 @@ getsc_bn(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
yyskiputf8bom(void)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
|
||||||
|
if ((unsigned char)(c = _getsc_()) != 0xEF) {
|
||||||
|
ungetsc_(c);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ((unsigned char)(c = _getsc_()) != 0xBB) {
|
||||||
|
ungetsc_(c);
|
||||||
|
ungetsc_(0xEF);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ((unsigned char)(c = _getsc_()) != 0xBF) {
|
||||||
|
ungetsc_(c);
|
||||||
|
ungetsc_(0xBB);
|
||||||
|
ungetsc_(0xEF);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
UTFMODE |= 8;
|
||||||
|
}
|
||||||
|
|
||||||
static Lex_state *
|
static Lex_state *
|
||||||
push_state_(State_info *si, Lex_state *old_end)
|
push_state_(State_info *si, Lex_state *old_end)
|
||||||
{
|
{
|
||||||
|
9
main.c
9
main.c
@ -33,7 +33,7 @@
|
|||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.180 2011/03/13 01:20:21 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.181 2011/03/13 16:03:52 tg Exp $");
|
||||||
|
|
||||||
extern char **environ;
|
extern char **environ;
|
||||||
|
|
||||||
@ -663,12 +663,10 @@ shell(Source * volatile s, volatile int toplevel)
|
|||||||
volatile int wastty = s->flags & SF_TTY;
|
volatile int wastty = s->flags & SF_TTY;
|
||||||
volatile int attempts = 13;
|
volatile int attempts = 13;
|
||||||
volatile int interactive = Flag(FTALKING) && toplevel;
|
volatile int interactive = Flag(FTALKING) && toplevel;
|
||||||
|
volatile bool sfirst = true;
|
||||||
Source *volatile old_source = source;
|
Source *volatile old_source = source;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* enable UTF-8 BOM check */
|
|
||||||
s->flags |= SF_FIRST;
|
|
||||||
|
|
||||||
newenv(E_PARSE);
|
newenv(E_PARSE);
|
||||||
if (interactive)
|
if (interactive)
|
||||||
really_exit = 0;
|
really_exit = 0;
|
||||||
@ -730,7 +728,8 @@ shell(Source * volatile s, volatile int toplevel)
|
|||||||
j_notify();
|
j_notify();
|
||||||
set_prompt(PS1, s);
|
set_prompt(PS1, s);
|
||||||
}
|
}
|
||||||
t = compile(s);
|
t = compile(s, sfirst);
|
||||||
|
sfirst = false;
|
||||||
if (t != NULL && t->type == TEOF) {
|
if (t != NULL && t->type == TEOF) {
|
||||||
if (wastty && Flag(FIGNOREEOF) && --attempts > 0) {
|
if (wastty && Flag(FIGNOREEOF) && --attempts > 0) {
|
||||||
shellf("Use 'exit' to leave mksh\n");
|
shellf("Use 'exit' to leave mksh\n");
|
||||||
|
9
sh.h
9
sh.h
@ -154,9 +154,9 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EXTERN
|
#ifdef EXTERN
|
||||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.444 2011/03/13 01:20:22 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.445 2011/03/13 16:03:53 tg Exp $");
|
||||||
#endif
|
#endif
|
||||||
#define MKSH_VERSION "R39 2011/03/12"
|
#define MKSH_VERSION "R39 2011/03/13"
|
||||||
|
|
||||||
#ifndef MKSH_INCLUDES_ONLY
|
#ifndef MKSH_INCLUDES_ONLY
|
||||||
|
|
||||||
@ -1353,8 +1353,7 @@ struct source {
|
|||||||
#define SF_ALIAS BIT(1) /* faking space at end of alias */
|
#define SF_ALIAS BIT(1) /* faking space at end of alias */
|
||||||
#define SF_ALIASEND BIT(2) /* faking space at end of alias */
|
#define SF_ALIASEND BIT(2) /* faking space at end of alias */
|
||||||
#define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */
|
#define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */
|
||||||
#define SF_FIRST BIT(4) /* initial state (to ignore UTF-8 BOM) */
|
#define SF_HASALIAS BIT(4) /* u.tblp valid (SALIAS, SEOF) */
|
||||||
#define SF_HASALIAS BIT(5) /* u.tblp valid (SALIAS, SEOF) */
|
|
||||||
|
|
||||||
typedef union {
|
typedef union {
|
||||||
int i;
|
int i;
|
||||||
@ -1741,7 +1740,7 @@ int shf_vfprintf(struct shf *, const char *, va_list)
|
|||||||
MKSH_A_FORMAT(printf, 2, 0);
|
MKSH_A_FORMAT(printf, 2, 0);
|
||||||
/* syn.c */
|
/* syn.c */
|
||||||
void initkeywords(void);
|
void initkeywords(void);
|
||||||
struct op *compile(Source *);
|
struct op *compile(Source *, bool);
|
||||||
bool parse_usec(const char *, struct timeval *);
|
bool parse_usec(const char *, struct timeval *);
|
||||||
char *yyrecursive(void);
|
char *yyrecursive(void);
|
||||||
/* tree.c */
|
/* tree.c */
|
||||||
|
8
syn.c
8
syn.c
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.56 2011/03/13 01:20:24 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.57 2011/03/13 16:03:54 tg Exp $");
|
||||||
|
|
||||||
struct nesting_state {
|
struct nesting_state {
|
||||||
int start_token; /* token than began nesting (eg, FOR) */
|
int start_token; /* token than began nesting (eg, FOR) */
|
||||||
@ -875,12 +875,16 @@ newtp(int type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct op *
|
struct op *
|
||||||
compile(Source *s)
|
compile(Source *s, bool skiputf8bom)
|
||||||
{
|
{
|
||||||
|
extern void yyskiputf8bom(void);
|
||||||
|
|
||||||
nesting.start_token = 0;
|
nesting.start_token = 0;
|
||||||
nesting.start_line = 0;
|
nesting.start_line = 0;
|
||||||
herep = heres;
|
herep = heres;
|
||||||
source = s;
|
source = s;
|
||||||
|
if (skiputf8bom)
|
||||||
|
yyskiputf8bom();
|
||||||
yyparse();
|
yyparse();
|
||||||
return (outtree);
|
return (outtree);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user