if we read an utf-8 BOM, enable the utf8-hack flag (we can test for that on

mirbsd even because the main.c enabling is only run for interactive shells)
This commit is contained in:
tg 2007-04-15 12:28:38 +00:00
parent 2785ce3de4
commit 680e8ebd3f
3 changed files with 18 additions and 5 deletions

13
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.97 2007/04/15 12:09:56 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.98 2007/04/15 12:28:37 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -3950,3 +3950,14 @@ expected-stdout:
a=BAR a=BAR
a=BAR a=BAR
--- ---
name: utf8bom-3
description:
Reading the UTF-8 BOM should enable the utf8-hack flag
category: pdksh
stdin:
$0 -c ':; x=$(set +o); if [[ $x = *utf8* ]]; then print on; else print off; fi'
$0 -c ':; x=$(set +o); if [[ $x = *utf8* ]]; then print on; else print off; fi'
expected-stdout:
off
on
---

3
lex.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.27 2007/04/15 10:45:58 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.28 2007/04/15 12:28:38 tg Exp $");
/* Structure to keep track of the lexing state and the various pieces of info /* Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state. */ * needed for each particular state. */
@ -957,6 +957,7 @@ getsc__(void)
(((const unsigned char *)(s->str))[0] == 0xBB) && (((const unsigned char *)(s->str))[0] == 0xBB) &&
(((const unsigned char *)(s->str))[1] == 0xBF)) { (((const unsigned char *)(s->str))[1] == 0xBF)) {
s->str += 2; s->str += 2;
Flag(FUTFHACK) = 1;
goto getsc_again; goto getsc_again;
} }
} }

7
mksh.1
View File

@ -1,7 +1,7 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.81 2007/03/10 21:14:07 tg Exp $ .\" $MirOS: src/bin/mksh/mksh.1,v 1.82 2007/04/15 12:28:38 tg Exp $
.\" $OpenBSD: ksh.1,v 1.118 2006/11/30 08:47:58 jmc Exp $ .\" $OpenBSD: ksh.1,v 1.118 2006/11/30 08:47:58 jmc Exp $
.\" .\"
.Dd March 10, 2007 .Dd April 15, 2007
.Dt MKSH 1 .Dt MKSH 1
.Os MirBSD .Os MirBSD
.Sh NAME .Sh NAME
@ -3269,7 +3269,8 @@ and optionally
and at least one of these returns something that matches and at least one of these returns something that matches
.Dq UTF-8 .Dq UTF-8
or or
.Dq utf8 . .Dq utf8 ,
or if the input begins with a Byte Order Mark.
.It Fl u \*(Ba Ic nounset .It Fl u \*(Ba Ic nounset
Referencing of an unset parameter is treated as an error, unless one of the Referencing of an unset parameter is treated as an error, unless one of the
.Ql - , .Ql - ,