diff --git a/check.t b/check.t index 75468c8..5f50207 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.97 2007/04/15 12:09:56 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.98 2007/04/15 12:28:37 tg Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ @@ -3950,3 +3950,14 @@ expected-stdout: a=BAR a=BAR --- +name: utf8bom-3 +description: + Reading the UTF-8 BOM should enable the utf8-hack flag +category: pdksh +stdin: + $0 -c ':; x=$(set +o); if [[ $x = *utf8* ]]; then print on; else print off; fi' + $0 -c ':; x=$(set +o); if [[ $x = *utf8* ]]; then print on; else print off; fi' +expected-stdout: + off + on +--- diff --git a/lex.c b/lex.c index 40b75d3..45b8e09 100644 --- a/lex.c +++ b/lex.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.27 2007/04/15 10:45:58 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.28 2007/04/15 12:28:38 tg Exp $"); /* Structure to keep track of the lexing state and the various pieces of info * needed for each particular state. */ @@ -957,6 +957,7 @@ getsc__(void) (((const unsigned char *)(s->str))[0] == 0xBB) && (((const unsigned char *)(s->str))[1] == 0xBF)) { s->str += 2; + Flag(FUTFHACK) = 1; goto getsc_again; } } diff --git a/mksh.1 b/mksh.1 index d12fc64..2d7fa80 100644 --- a/mksh.1 +++ b/mksh.1 @@ -1,7 +1,7 @@ -.\" $MirOS: src/bin/mksh/mksh.1,v 1.81 2007/03/10 21:14:07 tg Exp $ +.\" $MirOS: src/bin/mksh/mksh.1,v 1.82 2007/04/15 12:28:38 tg Exp $ .\" $OpenBSD: ksh.1,v 1.118 2006/11/30 08:47:58 jmc Exp $ .\" -.Dd March 10, 2007 +.Dd April 15, 2007 .Dt MKSH 1 .Os MirBSD .Sh NAME @@ -3269,7 +3269,8 @@ and optionally and at least one of these returns something that matches .Dq UTF-8 or -.Dq utf8 . +.Dq utf8 , +or if the input begins with a Byte Order Mark. .It Fl u \*(Ba Ic nounset Referencing of an unset parameter is treated as an error, unless one of the .Ql - ,