live code from FOSDEM: add print \xDB and \u20AC, including regression test

agreed bsiegert@
good idea and manpage diff ok'd by some netbsd person sitting next to me
This commit is contained in:
tg 2008-02-24 15:48:43 +00:00
parent 1a5ea7052c
commit d16fc19335
4 changed files with 72 additions and 10 deletions

12
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.142 2007/10/25 13:51:18 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.143 2008/02/24 15:48:42 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -7,7 +7,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R32 2007/10/25
@(#)MIRBSD KSH R33 2008/02/24
description:
Check version of shell.
category: pdksh
@ -4151,3 +4151,11 @@ stdin:
expected-stdout:
a defg a
---
name: print-funny-chars
description:
Check print builtin's capability to output designated characters
stdin:
print '<\0144\0344\xDB\u00DB\u20AC\uDB\x40>'
expected-stdout:
<däÛÃâ¬Ã@>
---

53
funcs.c
View File

@ -5,7 +5,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.67 2007/10/25 15:23:09 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.68 2008/02/24 15:48:42 tg Exp $");
/* A leading = means assignments before command are kept;
* a leading * means a POSIX special builtin;
@ -449,6 +449,57 @@ c_print(const char **wp)
break;
}
break;
case 'x':
/* Look for a hexadecimal number of
* up to 2 digits, write raw octet.
*/
c = 0;
for (i = 0; i < 2; i++) {
c <<= 4;
if (*s >= '0' && *s <= '9')
c += *s++ - '0';
else if (*s >= 'A' && *s <= 'F')
c += *s++ - 'A' + 10;
else if (*s >= 'a' && *s <= 'f')
c += *s++ - 'a' + 10;
else {
c >>= 4;
break;
}
}
break;
case 'u':
/* Look for a hexadecimal number of
* up to 4 digits, write Unicode.
*/
c = 0;
for (i = 0; i < 4; i++) {
c <<= 4;
if (*s >= '0' && *s <= '9')
c += *s++ - '0';
else if (*s >= 'A' && *s <= 'F')
c += *s++ - 'A' + 10;
else if (*s >= 'a' && *s <= 'f')
c += *s++ - 'a' + 10;
else {
c >>= 4;
break;
}
}
if (c < 0x80)
/* Xput below writes ASCII */;
else if (c < 0x0800) {
Xput(xs, xp, (c >> 6) | 0xC0);
c = 0x80 | (c & 0x3F);
/* leave 2nd octet to below */
} else {
Xput(xs, xp, (c >> 12) | 0xE0);
Xput(xs, xp,
((c >> 6) & 0x3F) | 0x80);
c = 0x80 | (c & 0x3F);
/* leave 3rd octet to below */
}
break;
case '\0': s--; c = '\\'; break;
case '\\': break;
default:

13
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.104 2007/10/25 13:51:18 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.105 2008/02/24 15:48:43 tg Exp $
.\" $OpenBSD: ksh.1,v 1.120 2007/05/31 20:47:44 otto Exp $
.\"
.Dd October 25, 2007
@ -3029,13 +3029,16 @@ These include
.Ql \en ,
.Ql \er ,
.Ql \et ,
.Ql \eu#### ,
.Ql \ev ,
.Ql \ex## ,
and
.Ql \e0###
.Po
.Ql \e0### ;
.Ql #
is an octal digit, of which there may be 0 to 3
.Pc .
is, in the case of \e0###, an octal, or, in the case of \eu####
or \ex##, a hexadecimal digit, of which there may be 0 to 2/3/4.
The \ex## and \e0### escapes translate to raw 8-bit octets;
the \eu#### escape translates a Unicode codepoint to UTF-8.
.Ql \ec
is equivalent to using the
.Fl n

4
sh.h
View File

@ -8,8 +8,8 @@
/* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */
/* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.184 2007/10/25 15:34:57 tg Exp $"
#define MKSH_VERSION "R32 2007/10/25"
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.185 2008/02/24 15:48:43 tg Exp $"
#define MKSH_VERSION "R33 2008/02/24"
#if HAVE_SYS_PARAM_H
#include <sys/param.h>