ignore the UTF-8 Byte Order Mark at the beginning of the input (via a file
given to execute, standard input (interactive or not), via -c command line argument, or after “eval”, but not for $(…) comsubs, at the beginning of a subsequent line, or within a line, etc.); regression test for it idea during my “week off” (despite the pain), bsiegert@ thinks it's good – and utf-8 capable tools ought to be able to do this anyway
This commit is contained in:
parent
67addd064f
commit
1692a6da66
46
check.t
46
check.t
@ -1,4 +1,4 @@
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.94 2007/03/14 02:41:08 tg Exp $
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.95 2007/04/15 10:45:58 tg Exp $
|
||||
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
||||
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
||||
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
||||
@ -3884,3 +3884,47 @@ stdin:
|
||||
expected-stdout:
|
||||
<0hall0 > < 0hall0> <hall0 > <00000hall0> <0000 hallo>
|
||||
---
|
||||
name: utf8bom-1
|
||||
description:
|
||||
Check that the UTF-8 Byte Order Mark is ignored as the first
|
||||
multibyte character of the shell input (with -c, from standard
|
||||
input, as file, or as eval argument), but nowhere else
|
||||
category: pdksh
|
||||
stdin:
|
||||
mkdir foo
|
||||
print '#!/bin/sh\necho ohne' >foo/fnord
|
||||
print '#!/bin/sh\necho mit' >foo/fnord
|
||||
print 'fnord\nfnord\nfnord\nfnord' >foo/bar
|
||||
print eval \''fnord\nfnord\nfnord\nfnord'\' >foo/zoo
|
||||
set -A anzahl -- foo/*
|
||||
print got ${#anzahl[*]} files
|
||||
chmod +x foo/*
|
||||
export PATH=$(pwd)/foo:$PATH
|
||||
$0 -c 'fnord'
|
||||
$0 -c 'fnord; fnord; fnord; fnord'
|
||||
$0 foo/bar
|
||||
$0 <foo/bar
|
||||
$0 foo/zoo
|
||||
$0 -c 'print : $(fnord)'
|
||||
rm -rf foo
|
||||
expected-stdout:
|
||||
got 4 files
|
||||
ohne
|
||||
ohne
|
||||
ohne
|
||||
mit
|
||||
ohne
|
||||
ohne
|
||||
ohne
|
||||
mit
|
||||
ohne
|
||||
ohne
|
||||
ohne
|
||||
mit
|
||||
ohne
|
||||
ohne
|
||||
ohne
|
||||
mit
|
||||
ohne
|
||||
: mit
|
||||
---
|
||||
|
19
lex.c
19
lex.c
@ -2,7 +2,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.26 2007/03/04 03:04:26 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.27 2007/04/15 10:45:58 tg Exp $");
|
||||
|
||||
/* Structure to keep track of the lexing state and the various pieces of info
|
||||
* needed for each particular state. */
|
||||
@ -64,9 +64,11 @@ static int ignore_backslash_newline;
|
||||
|
||||
/* optimised getsc_bn() */
|
||||
#define getsc() (*source->str != '\0' && *source->str != '\\' \
|
||||
&& !backslash_skip ? *source->str++ : getsc_bn())
|
||||
&& !backslash_skip && !(source->flags & SF_FIRST) \
|
||||
? *source->str++ : getsc_bn())
|
||||
/* optimised getsc__() */
|
||||
#define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
|
||||
#define getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
|
||||
? *source->str++ : getsc__())
|
||||
|
||||
#define STATE_BSIZE 32
|
||||
|
||||
@ -856,6 +858,7 @@ getsc__(void)
|
||||
Source *s = source;
|
||||
int c;
|
||||
|
||||
getsc_again:
|
||||
while ((c = *s->str++) == 0) {
|
||||
s->str = NULL; /* return 0 for EOF by default */
|
||||
switch (s->type) {
|
||||
@ -947,6 +950,16 @@ getsc__(void)
|
||||
shf_flush(shl_out);
|
||||
}
|
||||
}
|
||||
/* check for UTF-8 byte order mark */
|
||||
if (s->flags & SF_FIRST) {
|
||||
s->flags &= ~SF_FIRST;
|
||||
if (((unsigned char)c == 0xEF) &&
|
||||
(((const unsigned char *)(s->str))[0] == 0xBB) &&
|
||||
(((const unsigned char *)(s->str))[1] == 0xBF)) {
|
||||
s->str += 2;
|
||||
goto getsc_again;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
4
main.c
4
main.c
@ -13,7 +13,7 @@
|
||||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.73 2007/03/04 03:04:26 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.74 2007/04/15 10:45:59 tg Exp $");
|
||||
|
||||
extern char **environ;
|
||||
|
||||
@ -457,6 +457,8 @@ shell(Source * volatile s, volatile int toplevel)
|
||||
Source *volatile old_source = source;
|
||||
int i;
|
||||
|
||||
s->flags |= SF_FIRST; /* enable UTF-8 BOM check */
|
||||
|
||||
newenv(E_PARSE);
|
||||
if (interactive)
|
||||
really_exit = 0;
|
||||
|
3
sh.h
3
sh.h
@ -8,7 +8,7 @@
|
||||
/* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */
|
||||
/* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */
|
||||
|
||||
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.121 2007/03/14 02:41:09 tg Exp $"
|
||||
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.122 2007/04/15 10:45:59 tg Exp $"
|
||||
#define MKSH_VERSION "R29 2007/03/14"
|
||||
|
||||
#if HAVE_SYS_PARAM_H
|
||||
@ -1088,6 +1088,7 @@ struct source {
|
||||
#define SF_ALIAS BIT(1) /* faking space at end of alias */
|
||||
#define SF_ALIASEND BIT(2) /* faking space at end of alias */
|
||||
#define SF_TTY BIT(3) /* type == SSTDIN & it is a tty */
|
||||
#define SF_FIRST BIT(4) /* initial state (to ignore UTF-8 BOM) */
|
||||
|
||||
/*
|
||||
* states while lexing word
|
||||
|
Loading…
x
Reference in New Issue
Block a user