• more unsigned → unsigned int

• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
  at non-interactive startup, enabled at interactive startup, if the
  current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
  change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
  sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
  ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
  or do the same as print \x## or \u#### (depending on the utf8-hack flag),
  plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
  (print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
  base-0 numbers which I had planned to use for raw octets first, as they are
  used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
This commit is contained in:
tg 2008-04-19 22:15:06 +00:00
parent 4ff0ca0f86
commit 9b62cf15bf
14 changed files with 364 additions and 128 deletions

View File

@ -1,5 +1,5 @@
#!/bin/sh #!/bin/sh
srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.313 2008/04/02 16:55:05 tg Exp $' srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.314 2008/04/19 22:15:00 tg Exp $'
#- #-
# Environment used: CC CFLAGS CPPFLAGS LDFLAGS LIBS NOWARN NROFF TARGET_OS # Environment used: CC CFLAGS CPPFLAGS LDFLAGS LIBS NOWARN NROFF TARGET_OS
# CPPFLAGS recognised: MKSH_SMALL MKSH_ASSUME_UTF8 MKSH_NOPWNAM MKSH_NOVI # CPPFLAGS recognised: MKSH_SMALL MKSH_ASSUME_UTF8 MKSH_NOPWNAM MKSH_NOVI
@ -760,7 +760,7 @@ phase=x
NOWARN=$save_NOWARN # gcc runs with -Werror until here NOWARN=$save_NOWARN # gcc runs with -Werror until here
ac_test expstmt '' "if the compiler supports statements as expressions" <<-'EOF' ac_test expstmt '' "if the compiler supports statements as expressions" <<-'EOF'
#define ksh_isspace(c) ({ \ #define ksh_isspace(c) ({ \
unsigned ksh_isspace_c = (c); \ unsigned int ksh_isspace_c = (c); \
(ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || \ (ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || \
(ksh_isspace_c == 0x20); \ (ksh_isspace_c == 0x20); \
}) })

205
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.177 2008/04/19 22:03:18 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.178 2008/04/19 22:15:01 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -7,7 +7,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh # http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout: expected-stdout:
@(#)MIRBSD KSH R33 2008/04/16 @(#)MIRBSD KSH R33 2008/04/19
description: description:
Check version of shell. Check version of shell.
category: pdksh category: pdksh
@ -4373,6 +4373,35 @@ expected-stdout:
off off
on on
--- ---
name: utf8opt-1
description:
Check that the utf8-hack flag is not set at non-interactive startup
category: pdksh
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
print is set
else
print is not set
fi
expected-stdout:
is not set
---
name: utf8opt-2
description:
Check that the utf8-hack flag is set at interactive startup
category: pdksh
arguments: !-i!
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
print is set
else
print is not set
fi
expected-stdout:
is set
---
name: aliases-1 name: aliases-1
description: description:
Check if built-in shell aliases are okay Check if built-in shell aliases are okay
@ -4617,3 +4646,175 @@ expected-stdout:
bar bar
rab rab
--- ---
name: integer-base-one-1
description:
check if the use of fake integer base 1 works
stdin:
set -o utf8-hack
typeset -Uui16 i0=1#ï i1=1#€
typeset -i1 o0a=64
typeset -i1 o1a=0x263A
typeset -Uui1 o0b=0x7E
typeset -Uui1 o1b=0xFDD0
integer px=0xCAFE 'p0=1# ' p1=1#… pl=1#f
print "in <$i0> <$i1>"
print "out <${o0a#1#}|${o0b#1#}> <${o1a#1#}|${o1b#1#}>"
typeset -Uui1 i0 i1
print "pass <$px> <$p0> <$p1> <$pl> <${i0#1#}|${i1#1#}>"
typeset -Uui16 tv1=1#~ tv2=1# tv3=1#€ tv4=1#<23> tv5=1#À tv6=1#Á tv7=1#  tv8=1#€
print "specX <${tv1#16#}> <${tv2#16#}> <${tv3#16#}> <${tv4#16#}> <${tv5#16#}> <${tv6#16#}> <${tv7#16#}> <${tv8#16#}>"
typeset -i1 tv1 tv2 tv3 tv4 tv5 tv6 tv7 tv8
print "specW <${tv1#1#}> <${tv2#1#}> <${tv3#1#}> <${tv4#1#}> <${tv5#1#}> <${tv6#1#}> <${tv7#1#}> <${tv8#1#}>"
typeset -i1 xs1=0xEF7F xs2=0xEF80 xs3=0xFDD0
print "specU <${xs1#1#}> <${xs2#1#}> <${xs3#1#}>"
expected-stdout:
in <16#EFEF> <16#20AC>
out <@|~> <☺|ï·<C3AF>>
pass <16#cafe> <1# > <1#…> <1#f> <ï|€>
specX <7E> <7F> <EF80> <EF81> <EFC0> <EFC1> <A0> <80>
specW <~> <> <€> <<3C>> <À> <Á> < > <€>
specU <> <€> <ï·<C3AF>>
---
name: integer-base-one-2a
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
integer x=1#foo
print /$x/
expected-stderr-pattern:
/1#foo: unexpected 'oo'/
expected-exit: e != 0
---
name: integer-base-one-2b
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
integer x=1#À€
print /$x/
expected-stderr-pattern:
/1#À€: unexpected '€'/
expected-exit: e != 0
---
name: integer-base-one-2c1
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
integer x=1#…
print /$x/
expected-stdout:
/1#…/
---
name: integer-base-one-2c2
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set +o utf8-hack
integer x=1#…
print /$x/
expected-stderr-pattern:
/1#…: unexpected '€'/
expected-exit: e != 0
---
name: integer-base-one-3a
description:
some sample code for hexdumping
stdin:
print 'Hello, World!\\\nã<6E>“ã“ã<E2809C>«ã<C2AB>¡ã<C2A1>¯ï¼<C3AF>' | {
typeset -Uui16 -Z11 pos=0
typeset -Uui16 -Z5 hv
typeset -i1 wc=0x0A
dasc=
nl=${wc#1#}
while IFS= read -r line; do
line=$line$nl
while [[ -n $line ]]; do
hv=1#${line::1}
if (( (pos & 15) == 0 )); then
(( pos )) && print "$dasc|"
print -n "${pos#16#} "
dasc=' |'
fi
print -n "${hv#16#} "
if (( (hv < 32) || (hv > 126) )); then
dasc=$dasc.
else
dasc=$dasc${line::1}
fi
(( (pos++ & 15) == 7 )) && print -n -- '- '
line=${line:1}
done
done
if (( (pos & 15) != 1 )); then
while (( pos & 15 )); do
print -n ' '
(( (pos++ & 15) == 7 )) && print -n -- '- '
done
print "$dasc|"
fi
}
expected-stdout:
00000000 48 65 6C 6C 6F 2C 20 57 - 6F 72 6C 64 21 5C 0A E3 |Hello, World!\..|
00000010 81 93 E3 82 93 E3 81 AB - E3 81 A1 E3 81 AF EF BC |................|
00000020 81 0A - |..|
---
name: integer-base-one-3b
description:
some sample code for hexdumping Unicode
as of now, doesn't work because illicit assignments break
expected-fail: yes
stdin:
set -o utf8-hack
print 'Hello, World!\\\nã<6E>“ã“ã<E2809C>«ã<C2AB>¡ã<C2A1>¯ï¼<C3AF>' | {
typeset -Uui16 -Z11 pos=0
typeset -Uui16 -Z5 hv
typeset -i1 wc=0x0A
dasc=
nl=${wc#1#}
integer n
while IFS= read -r line; do
line=$line$nl
while [[ -n $line ]]; do
if (( ${#line} > 2 )) && wc=1#${line::3}; then
n=3
elif (( ${#line} > 1 )) && wc=1#${line::2}; then
n=2
else
wc=1#${line::1}
n=3
fi
if (( (wc < 32) || \
((wc > 126) && (wc < 160)) )); then
dasc=$dasc.
elif (( wc < 0x0800 )); then
dasc=$dasc${wc#1#}
fi
while (( n-- )); do
if (( (pos & 15) == 0 )); then
(( pos )) && print "$dasc|"
print -n "${pos#16#} "
dasc=' |'
fi
hv=1#${line::1}
print -n "${hv#16#} "
(( (pos++ & 15) == 7 )) && print -- '- '
line=${line:1}
done
(( wc >= 0x0800 )) && dasc=$dasc${wc#1#}
done
done
if (( pos & 15 )); then
while (( pos & 15 )); do
print -n ' '
(( (pos++ & 15) == 7 )) && print -- '- '
done
print "$dasc|"
fi
}
expected-stdout:
00000000 48 65 6C 6C 6F 2C 20 57 - 6F 72 6C 64 21 5C 0A E3 |Hello, World!\.|
00000010 81 93 E3 82 93 E3 81 AB - E3 81 A1 E3 81 AF EF BC |ã<EFBFBD>ãã<EFBFBD>«ã<EFBFBD>¡ã<EFBFBD>¯|
00000020 81 0A - |ï¼<EFBFBD>.|
---

93
edit.c
View File

@ -5,7 +5,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.121 2008/04/19 17:25:49 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/edit.c,v 1.122 2008/04/19 22:15:01 tg Exp $");
/* tty driver characters we are interested in */ /* tty driver characters we are interested in */
typedef struct { typedef struct {
@ -58,9 +58,6 @@ static int x_file_glob(int, const char *, int, char ***);
static int x_command_glob(int, const char *, int, char ***); static int x_command_glob(int, const char *, int, char ***);
static int x_locate_word(const char *, int, int, int *, bool *); static int x_locate_word(const char *, int, int, int *, bool *);
static size_t mbxtowc(unsigned *, const char *);
static size_t wcxtomb(char *, unsigned);
static int wcxwidth(unsigned);
static int x_e_getmbc(char *); static int x_e_getmbc(char *);
static char *utf_getcpfromcols(char *, int); static char *utf_getcpfromcols(char *, int);
@ -770,24 +767,21 @@ int
utf_widthadj(const char *src, const char **dst) utf_widthadj(const char *src, const char **dst)
{ {
size_t len; size_t len;
unsigned wc; unsigned int wc;
int width; int width;
if (!Flag(FUTFHACK) || *(const unsigned char *)src <= 0x7F || if (!Flag(FUTFHACK) || (len = utf_mbtowc(&wc, src)) == (size_t)-1)
(len = mbxtowc(&wc, src)) == (size_t)-1) { len = width = 1;
if (dst) else
*dst = src + 1; width = utf_wcwidth(wc);
return (1);
}
if (dst) if (dst)
*dst = src + len; *dst = src + len;
width = wcxwidth(wc);
return (width == -1 ? 2 : width); return (width == -1 ? 2 : width);
} }
int int
ksh_mbswidth(const char *s) utf_mbswidth(const char *s)
{ {
size_t len; size_t len;
unsigned int wc; unsigned int wc;
@ -797,8 +791,8 @@ ksh_mbswidth(const char *s)
return (strlen(s)); return (strlen(s));
while (*s) while (*s)
if (((len = mbxtowc(&wc, s)) == (size_t)-1) || if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
((cw = wcxwidth(wc)) == -1)) { ((cw = utf_wcwidth(wc)) == -1)) {
s++; s++;
width += 1; width += 1;
} else { } else {
@ -870,8 +864,8 @@ utf_getcpfromcols(char *p, int cols)
__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.4 2006/11/01 20:01:20 tg Exp $"); __RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.4 2006/11/01 20:01:20 tg Exp $");
static int int
wcxwidth(unsigned c) utf_wcwidth(unsigned int c)
{ {
static const struct cbset { static const struct cbset {
unsigned short first; unsigned short first;
@ -953,8 +947,8 @@ wcxwidth(unsigned c)
/* --- begin of mbrtowc.c excerpt --- */ /* --- begin of mbrtowc.c excerpt --- */
__RCSID("$miros: src/lib/libc/i18n/mbrtowc.c,v 1.13 2006/11/01 20:01:19 tg Exp $"); __RCSID("$miros: src/lib/libc/i18n/mbrtowc.c,v 1.13 2006/11/01 20:01:19 tg Exp $");
static size_t size_t
mbxtowc(unsigned *dst, const char *src) utf_mbtowc(unsigned int *dst, const char *src)
{ {
const unsigned char *s = (const unsigned char *)src; const unsigned char *s = (const unsigned char *)src;
unsigned int c, wc, count; unsigned int c, wc, count;
@ -1002,29 +996,28 @@ mbxtowc(unsigned *dst, const char *src)
/* --- begin of wcrtomb.c excerpt --- */ /* --- begin of wcrtomb.c excerpt --- */
__RCSID("$miros: src/lib/libc/i18n/wcrtomb.c,v 1.14 2006/11/01 20:12:44 tg Exp $"); __RCSID("$miros: src/lib/libc/i18n/wcrtomb.c,v 1.14 2006/11/01 20:12:44 tg Exp $");
static size_t size_t
wcxtomb(char *src, unsigned wc) utf_wctomb(char *dst, unsigned int wc)
{ {
unsigned char *s = (unsigned char *)src; unsigned char count, *d = (unsigned char *)dst;
unsigned int count;
if (wc > 0xFFFD) if (wc > 0xFFFD)
wc = 0xFFFD; wc = 0xFFFD;
if (wc < 0x80) { if (wc < 0x80) {
count = 0; count = 0;
*s++ = wc; *d++ = wc;
} else if (wc < 0x0800) { } else if (wc < 0x0800) {
count = 1; count = 1;
*s++ = (wc >> 6) | 0xC0; *d++ = (wc >> 6) | 0xC0;
} else { } else {
count = 2; count = 2;
*s++ = (wc >> 12) | 0xE0; *d++ = (wc >> 12) | 0xE0;
} }
while (count) { while (count) {
*s++ = ((wc >> (6 * --count)) & 0x3F) | 0x80; *d++ = ((wc >> (6 * --count)) & 0x3F) | 0x80;
} }
return ((char *)s - src); return ((char *)d - dst);
} }
/* --- end of wcrtomb.c excerpt --- */ /* --- end of wcrtomb.c excerpt --- */
@ -1435,17 +1428,22 @@ x_e_getmbc(char *sbuf)
return (-1); return (-1);
if (Flag(FUTFHACK)) { if (Flag(FUTFHACK)) {
if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) { if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
buf[pos++] = c = x_e_getc(); c = x_e_getc();
if (c == -1) if (c == -1)
return (-1); return (-1);
if ((c & 0xC0) != 0x80) {
x_e_ungetc(c);
return (1);
}
buf[pos++] = c;
} }
if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) { if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) {
/* XXX x_e_ungetc is one-octet only */
buf[pos++] = c = x_e_getc(); buf[pos++] = c = x_e_getc();
if (c == -1) if (c == -1)
return (-1); return (-1);
} }
} }
buf[pos] = '\0';
return (pos); return (pos);
} }
@ -1910,17 +1908,24 @@ x_zotc2(int c)
static void static void
x_zotc3(char **cp) x_zotc3(char **cp)
{ {
unsigned c = **(unsigned char **)cp; unsigned char c = **(unsigned char **)cp;
if (c == 0xC2 && Flag(FUTFHACK)) {
unsigned char c2 = ((unsigned char *)*cp)[1];
if (c2 >= 0x80 && c2 < 0xA0) {
c = c2;
(*cp)++;
}
}
if (c == '\t') { if (c == '\t') {
/* Kludge, tabs are always four spaces. */ /* Kludge, tabs are always four spaces. */
x_e_puts(" "); x_e_puts(" ");
(*cp)++; (*cp)++;
} else if (c < ' ' || c == 0x7f || (Flag(FUTFHACK) && c == 0xC2 && } else if (c < ' ' || (c >= 0x7F && c < 0xA0)) {
((unsigned char *)*cp)[1] < 0xA0 && mbxtowc(&c, *cp))) {
x_e_putc2('^'); x_e_putc2('^');
x_e_putc2(UNCTRL(c)); x_e_putc2(UNCTRL(c));
*cp += c & 0x80 ? 2 : 1; (*cp)++;
} else } else
x_e_putc3((const char **)cp); x_e_putc3((const char **)cp);
} }
@ -2372,35 +2377,35 @@ x_transpose(int c __unused)
* cursor, do not change cursor position * cursor, do not change cursor position
*/ */
x_bs2(xcp = utf_backch(xcp)); x_bs2(xcp = utf_backch(xcp));
if (mbxtowc(&tmpa, xcp) == (size_t)-1) { if (utf_mbtowc(&tmpa, xcp) == (size_t)-1) {
x_e_putc2(7); x_e_putc2(7);
return KSTD; return KSTD;
} }
x_bs2(xcp = utf_backch(xcp)); x_bs2(xcp = utf_backch(xcp));
if (mbxtowc(&tmpb, xcp) == (size_t)-1) { if (utf_mbtowc(&tmpb, xcp) == (size_t)-1) {
x_e_putc2(7); x_e_putc2(7);
return KSTD; return KSTD;
} }
wcxtomb(xcp, tmpa); utf_wctomb(xcp, tmpa);
x_zotc3(&xcp); x_zotc3(&xcp);
wcxtomb(xcp, tmpb); utf_wctomb(xcp, tmpb);
x_zotc3(&xcp); x_zotc3(&xcp);
} else { } else {
/* GNU emacs style: Swap the characters before and under the /* GNU emacs style: Swap the characters before and under the
* cursor, move cursor position along one. * cursor, move cursor position along one.
*/ */
if (mbxtowc(&tmpa, xcp) == (size_t)-1) { if (utf_mbtowc(&tmpa, xcp) == (size_t)-1) {
x_e_putc2(7); x_e_putc2(7);
return KSTD; return KSTD;
} }
x_bs2(xcp = utf_backch(xcp)); x_bs2(xcp = utf_backch(xcp));
if (mbxtowc(&tmpb, xcp) == (size_t)-1) { if (utf_mbtowc(&tmpb, xcp) == (size_t)-1) {
x_e_putc2(7); x_e_putc2(7);
return KSTD; return KSTD;
} }
wcxtomb(xcp, tmpa); utf_wctomb(xcp, tmpa);
x_zotc3(&xcp); x_zotc3(&xcp);
wcxtomb(xcp, tmpb); utf_wctomb(xcp, tmpb);
x_zotc3(&xcp); x_zotc3(&xcp);
} }
return KSTD; return KSTD;
@ -2957,13 +2962,13 @@ x_e_putc2(int c)
if (c < 0xA0) if (c < 0xA0)
c = 0xFFFD; c = 0xFFFD;
x = wcxtomb(utf_tmp, c); x = utf_wctomb(utf_tmp, c);
x_putc(utf_tmp[0]); x_putc(utf_tmp[0]);
if (x > 1) if (x > 1)
x_putc(utf_tmp[1]); x_putc(utf_tmp[1]);
if (x > 2) if (x > 2)
x_putc(utf_tmp[2]); x_putc(utf_tmp[2]);
width = wcxwidth(c); width = utf_wcwidth(c);
} else } else
x_putc(c); x_putc(c);
switch (c) { switch (c) {

6
exec.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.42 2008/04/19 17:21:53 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/exec.c,v 1.43 2008/04/19 22:15:02 tg Exp $");
static int comexec(struct op *, struct tbl *volatile, const char **, static int comexec(struct op *, struct tbl *volatile, const char **,
int volatile); int volatile);
@ -1349,7 +1349,7 @@ pr_menu(const char *const *ap)
* get dimensions of the list * get dimensions of the list
*/ */
for (n = 0, nwidth = 0, pp = ap; *pp; n++, pp++) { for (n = 0, nwidth = 0, pp = ap; *pp; n++, pp++) {
i = ksh_mbswidth(*pp); i = utf_mbswidth(*pp);
nwidth = (i > nwidth) ? i : nwidth; nwidth = (i > nwidth) ? i : nwidth;
} }
/* /*
@ -1388,7 +1388,7 @@ pr_list(char *const *ap)
int nwidth, i, n; int nwidth, i, n;
for (n = 0, nwidth = 0, pp = ap; *pp; n++, pp++) { for (n = 0, nwidth = 0, pp = ap; *pp; n++, pp++) {
i = ksh_mbswidth(*pp); i = utf_mbswidth(*pp);
nwidth = (i > nwidth) ? i : nwidth; nwidth = (i > nwidth) ? i : nwidth;
} }
print_columns(shl_out, n, plain_fmt_entry, (const void *)ap, print_columns(shl_out, n, plain_fmt_entry, (const void *)ap,

11
expr.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.14 2008/03/28 13:33:37 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.15 2008/04/19 22:15:02 tg Exp $");
/* The order of these enums is constrained by the order of opinfo[] */ /* The order of these enums is constrained by the order of opinfo[] */
enum token { enum token {
@ -475,10 +475,15 @@ exprtoken(Expr_state *es)
afree(tvar, ATEMP); afree(tvar, ATEMP);
} }
es->tok = VAR; es->tok = VAR;
} else if (c == '1' && cp[1] == '#') {
utf_cptradj(cp + 2, &cp);
tvar = str_nsave(es->tokp, cp - es->tokp, ATEMP);
goto process_tvar;
} else if (ksh_isdigit(c)) { } else if (ksh_isdigit(c)) {
for (; c != '_' && (ksh_isalnux(c) || c == '#'); c = *cp++) while (c != '_' && (ksh_isalnux(c) || c == '#'))
; c = *cp++;
tvar = str_nsave(es->tokp, --cp - es->tokp, ATEMP); tvar = str_nsave(es->tokp, --cp - es->tokp, ATEMP);
process_tvar:
es->val = tempvar(); es->val = tempvar();
es->val->flag &= ~INTEGER; es->val->flag &= ~INTEGER;
es->val->type = 0; es->val->type = 0;

View File

@ -5,7 +5,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.76 2008/04/16 21:56:01 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.77 2008/04/19 22:15:03 tg Exp $");
/* A leading = means assignments before command are kept; /* A leading = means assignments before command are kept;
* a leading * means a POSIX special builtin; * a leading * means a POSIX special builtin;
@ -1591,7 +1591,7 @@ c_umask(const char **wp)
p[-1] = '\0'; p[-1] = '\0';
shprintf("%s\n", buf); shprintf("%s\n", buf);
} else } else
shprintf("%#3.3o\n", (unsigned) old_umask); shprintf("%#3.3o\n", (unsigned int)old_umask);
} else { } else {
mode_t new_umask; mode_t new_umask;

4
lex.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.57 2008/03/28 13:46:53 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.58 2008/04/19 22:15:03 tg Exp $");
/* /*
* states while lexing word * states while lexing word
@ -1310,7 +1310,7 @@ dopprompt(const char *cp, int ntruncate, int doprint)
columns--; columns--;
} else if (*cp == delimiter) } else if (*cp == delimiter)
indelimit = !indelimit; indelimit = !indelimit;
else if (Flag(FUTFHACK) && ((unsigned)*cp > 0x7F)) { else if (Flag(FUTFHACK) && ((unsigned char)*cp > 0x7F)) {
const char *cp2; const char *cp2;
columns += utf_widthadj(cp, &cp2); columns += utf_widthadj(cp, &cp2);
if (doprint && (indelimit || if (doprint && (indelimit ||

12
misc.c
View File

@ -6,7 +6,7 @@
#include <grp.h> #include <grp.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.74 2008/04/19 17:21:54 tg Exp $\t" __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.75 2008/04/19 22:15:03 tg Exp $\t"
MKSH_SH_H_ID); MKSH_SH_H_ID);
#undef USE_CHVT #undef USE_CHVT
@ -35,7 +35,7 @@ static char *do_phys_path(XString *, char *, const char *);
void void
setctypes(const char *s, int t) setctypes(const char *s, int t)
{ {
unsigned i; unsigned int i;
if (t & C_IFS) { if (t & C_IFS) {
for (i = 0; i < UCHAR_MAX + 1; i++) for (i = 0; i < UCHAR_MAX + 1; i++)
@ -88,7 +88,7 @@ str_save(const char *s, Area *ap)
/* called from XcheckN() to grow buffer */ /* called from XcheckN() to grow buffer */
char * char *
Xcheck_grow_(XString *xsp, const char *xp, unsigned more) Xcheck_grow_(XString *xsp, const char *xp, unsigned int more)
{ {
const char *old_beg = xsp->beg; const char *old_beg = xsp->beg;
@ -185,7 +185,7 @@ options_fmt_entry(const void *arg, int i, char *buf, int buflen)
static void static void
printoptions(int verbose) printoptions(int verbose)
{ {
unsigned i; unsigned int i;
if (verbose) { if (verbose) {
struct options_info oi; struct options_info oi;
@ -216,7 +216,7 @@ printoptions(int verbose)
char * char *
getoptions(void) getoptions(void)
{ {
unsigned i; unsigned int i;
char m[(int) FNFLAGS + 1]; char m[(int) FNFLAGS + 1];
char *cp = m; char *cp = m;
@ -1446,7 +1446,7 @@ stristr(const char *b, const char *l)
#if !HAVE_EXPSTMT #if !HAVE_EXPSTMT
bool bool
ksh_isspace_(unsigned ksh_isspace_c) ksh_isspace_(unsigned int ksh_isspace_c)
{ {
return ((ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || return ((ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) ||
(ksh_isspace_c == 0x20)); (ksh_isspace_c == 0x20));

10
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.117 2008/04/19 21:18:52 tg Exp $ .\" $MirOS: src/bin/mksh/mksh.1,v 1.118 2008/04/19 22:15:04 tg Exp $
.\" $OpenBSD: ksh.1,v 1.121 2008/03/21 12:51:19 millert Exp $ .\" $OpenBSD: ksh.1,v 1.121 2008/03/21 12:51:19 millert Exp $
.\"- .\"-
.\" Try to make GNU groff and AT&T nroff more compatible .\" Try to make GNU groff and AT&T nroff more compatible
@ -2110,6 +2110,14 @@ extension, in all forms of arithmetic expressions,
except as numeric arguments to the except as numeric arguments to the
.Ic test .Ic test
command. command.
As a special
.Nm mksh
extension, numbers to the base of one are treated as either (8-bit
transparent) ASCII or Unicode codepoints, depending on the shell's
.Ic utf8\-hack
flag (current setting).
In Unicode mode, raw octets are mapped into the range EF80..EFFF,
which is in the PUA and has been assigned by CSUR for this use.
.Pp .Pp
The operators are evaluated as follows: The operators are evaluated as follows:
.Bl -tag -width Ds -offset indent .Bl -tag -width Ds -offset indent

View File

@ -1,5 +1,4 @@
/** $MirOS: src/bin/mksh/setmode.c,v 1.10 2008/04/06 23:27:19 tg Exp $ */ /** $MirOS: src/bin/mksh/setmode.c,v 1.11 2008/04/19 22:15:05 tg Exp $ */
/** $miros: src/lib/libc/gen/setmode.c,v 1.9 2007/10/25 15:13:39 tg Exp $ */
/* $OpenBSD: setmode.c,v 1.17 2005/08/08 08:05:34 espie Exp $ */ /* $OpenBSD: setmode.c,v 1.17 2005/08/08 08:05:34 espie Exp $ */
/* $NetBSD: setmode.c,v 1.15 1997/02/07 22:21:06 christos Exp $ */ /* $NetBSD: setmode.c,v 1.15 1997/02/07 22:21:06 christos Exp $ */
@ -58,23 +57,21 @@
#endif #endif
__SCCSID("@(#)setmode.c 8.2 (Berkeley) 3/25/94"); __SCCSID("@(#)setmode.c 8.2 (Berkeley) 3/25/94");
__RCSID("$MirOS: src/bin/mksh/setmode.c,v 1.10 2008/04/06 23:27:19 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/setmode.c,v 1.11 2008/04/19 22:15:05 tg Exp $");
__RCSID("$miros: src/lib/libc/gen/setmode.c,v 1.9 2007/10/25 15:13:39 tg Exp $"); __RCSID("$miros: src/lib/libc/gen/setmode.c,v 1.10 2008/04/19 16:27:23 tg Exp $");
/* for mksh */ /* for mksh */
#ifdef ksh_isdigit #ifdef ksh_isdigit
#undef isdigit #undef isdigit
#define isdigit ksh_isdigit #define isdigit ksh_isdigit
#endif #endif
/* for portability */
#ifndef S_ISTXT #ifndef S_ISTXT
#define S_ISTXT 0001000 #define S_ISTXT 0001000
#endif #endif
#define SET_LEN 6 /* initial # of bitcmd struct to malloc */
#define SET_LEN 6 /* initial # of bitcmd struct to malloc */ #define SET_LEN_INCR 4 /* # of bitcmd structs to add as needed */
#define SET_LEN_INCR 4 /* # of bitcmd structs to add as needed */
typedef struct bitcmd { typedef struct bitcmd {
char cmd; char cmd;
@ -88,7 +85,7 @@ typedef struct bitcmd {
#define CMD2_OBITS 0x08 #define CMD2_OBITS 0x08
#define CMD2_UBITS 0x10 #define CMD2_UBITS 0x10
static BITCMD *addcmd(BITCMD *, int, int, int, unsigned); static BITCMD *addcmd(BITCMD *, int, int, int, unsigned int);
static void compress_mode(BITCMD *); static void compress_mode(BITCMD *);
#ifdef SETMODE_DEBUG #ifdef SETMODE_DEBUG
static void dumpmode(BITCMD *); static void dumpmode(BITCMD *);
@ -364,7 +361,7 @@ setmode(const char *p)
} }
static BITCMD * static BITCMD *
addcmd(BITCMD *set, int op, int who, int oparg, unsigned mask) addcmd(BITCMD *set, int op, int who, int oparg, unsigned int mask)
{ {
switch (op) { switch (op) {
case '=': case '=':

17
sh.h
View File

@ -8,8 +8,8 @@
/* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */ /* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */
/* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */ /* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.206 2008/04/19 17:21:54 tg Exp $" #define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.207 2008/04/19 22:15:05 tg Exp $"
#define MKSH_VERSION "R33 2008/04/16" #define MKSH_VERSION "R33 2008/04/19"
#if HAVE_SYS_PARAM_H #if HAVE_SYS_PARAM_H
#include <sys/param.h> #include <sys/param.h>
@ -156,12 +156,12 @@ typedef int bool;
#if HAVE_EXPSTMT #if HAVE_EXPSTMT
/* this macro must not evaluate its arguments several times */ /* this macro must not evaluate its arguments several times */
#define ksh_isspace(c) ({ \ #define ksh_isspace(c) ({ \
unsigned ksh_isspace_c = (c); \ unsigned int ksh_isspace_c = (c); \
(ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || \ (ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || \
(ksh_isspace_c == 0x20); \ (ksh_isspace_c == 0x20); \
}) })
#else #else
#define ksh_isspace(c) ksh_isspace_((unsigned)(c)) #define ksh_isspace(c) ksh_isspace_((unsigned int)(c))
#endif #endif
#ifndef S_ISLNK #ifndef S_ISLNK
@ -1054,7 +1054,7 @@ typedef char *XStringP;
#define Xsavepos(xs, xp) ((xp) - (xs).beg) #define Xsavepos(xs, xp) ((xp) - (xs).beg)
#define Xrestpos(xs, xp, n) ((xs).beg + (n)) #define Xrestpos(xs, xp, n) ((xs).beg + (n))
char *Xcheck_grow_(XString *, const char *, unsigned); char *Xcheck_grow_(XString *, const char *, unsigned int);
/* /*
* expandable vector of generic pointers * expandable vector of generic pointers
@ -1219,9 +1219,12 @@ void x_init(void);
int x_read(char *, size_t); int x_read(char *, size_t);
int x_bind(const char *, const char *, int, int); int x_bind(const char *, const char *, int, int);
/* UTF-8 hack stuff */ /* UTF-8 hack stuff */
size_t utf_mbtowc(unsigned int *, const char *);
size_t utf_wctomb(char *, unsigned int);
size_t utf_cptradj(const char *, const char **); size_t utf_cptradj(const char *, const char **);
int ksh_mbswidth(const char *);
int utf_widthadj(const char *, const char **); int utf_widthadj(const char *, const char **);
int utf_mbswidth(const char *);
int utf_wcwidth(unsigned int);
/* eval.c */ /* eval.c */
char *substitute(const char *, int); char *substitute(const char *, int);
char **eval(const char **, int); char **eval(const char **, int);
@ -1439,7 +1442,7 @@ void simplify_path(char *);
char *get_phys_path(const char *); char *get_phys_path(const char *);
void set_current_wd(char *); void set_current_wd(char *);
#if !HAVE_EXPSTMT #if !HAVE_EXPSTMT
bool ksh_isspace_(unsigned); bool ksh_isspace_(unsigned int);
#endif #endif
/* shf.c */ /* shf.c */
struct shf *shf_open(const char *, int, int, int); struct shf *shf_open(const char *, int, int, int);

38
shf.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.18 2008/04/19 17:21:55 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/shf.c,v 1.19 2008/04/19 22:15:05 tg Exp $");
/* flags to shf_emptybuf() */ /* flags to shf_emptybuf() */
#define EB_READSW 0x01 /* about to switch to reading */ #define EB_READSW 0x01 /* about to switch to reading */
@ -31,7 +31,7 @@ shf_open(const char *name, int oflags, int mode, int sflags)
/* Done before open so if alloca fails, fd won't be lost. */ /* Done before open so if alloca fails, fd won't be lost. */
shf = (struct shf *) alloc(sizeof(struct shf) + bsize, ATEMP); shf = (struct shf *) alloc(sizeof(struct shf) + bsize, ATEMP);
shf->areap = ATEMP; shf->areap = ATEMP;
shf->buf = (unsigned char *) &shf[1]; shf->buf = (unsigned char *)&shf[1];
shf->bsize = bsize; shf->bsize = bsize;
shf->flags = SHF_ALLOCS; shf->flags = SHF_ALLOCS;
/* Rest filled in by reopen. */ /* Rest filled in by reopen. */
@ -92,13 +92,13 @@ shf_fdopen(int fd, int sflags, struct shf *shf)
if (shf) { if (shf) {
if (bsize) { if (bsize) {
shf->buf = (unsigned char *) alloc(bsize, ATEMP); shf->buf = (unsigned char *)alloc(bsize, ATEMP);
sflags |= SHF_ALLOCB; sflags |= SHF_ALLOCB;
} else } else
shf->buf = NULL; shf->buf = NULL;
} else { } else {
shf = (struct shf *) alloc(sizeof(struct shf) + bsize, ATEMP); shf = (struct shf *)alloc(sizeof(struct shf) + bsize, ATEMP);
shf->buf = (unsigned char *) &shf[1]; shf->buf = (unsigned char *)&shf[1];
sflags |= SHF_ALLOCS; sflags |= SHF_ALLOCS;
} }
shf->areap = ATEMP; shf->areap = ATEMP;
@ -191,7 +191,7 @@ shf_sopen(char *buf, int bsize, int sflags, struct shf *shf)
buf = alloc(bsize, shf->areap); buf = alloc(bsize, shf->areap);
} }
shf->fd = -1; shf->fd = -1;
shf->buf = shf->rp = shf->wp = (unsigned char *) buf; shf->buf = shf->rp = shf->wp = (unsigned char *)buf;
shf->rnleft = bsize; shf->rnleft = bsize;
shf->rbsize = bsize; shf->rbsize = bsize;
shf->wnleft = bsize - 1; /* space for a '\0' */ shf->wnleft = bsize - 1; /* space for a '\0' */
@ -314,7 +314,7 @@ shf_emptybuf(struct shf *shf, int flags)
shf->flags &= ~SHF_READING; shf->flags &= ~SHF_READING;
} }
if (shf->flags & SHF_STRING) { if (shf->flags & SHF_STRING) {
unsigned char *nbuf; unsigned char *nbuf;
/* Note that we assume SHF_ALLOCS is not set if SHF_ALLOCB /* Note that we assume SHF_ALLOCS is not set if SHF_ALLOCB
* is set... (changing the shf pointer could cause problems) * is set... (changing the shf pointer could cause problems)
@ -323,7 +323,7 @@ shf_emptybuf(struct shf *shf, int flags)
!(shf->flags & SHF_ALLOCB)) !(shf->flags & SHF_ALLOCB))
return EOF; return EOF;
/* allocate more space for buffer */ /* allocate more space for buffer */
nbuf = (unsigned char *) aresize(shf->buf, shf->wbsize * 2, nbuf = (unsigned char *)aresize(shf->buf, shf->wbsize * 2,
shf->areap); shf->areap);
shf->rp = nbuf + (shf->rp - shf->buf); shf->rp = nbuf + (shf->rp - shf->buf);
shf->wp = nbuf + (shf->wp - shf->buf); shf->wp = nbuf + (shf->wp - shf->buf);
@ -477,7 +477,7 @@ shf_getse(char *buf, int bsize, struct shf *shf)
return buf == orig_buf ? NULL : buf; return buf == orig_buf ? NULL : buf;
} }
} }
end = (unsigned char *) memchr((char *) shf->rp, '\n', end = (unsigned char *)memchr((char *) shf->rp, '\n',
shf->rnleft); shf->rnleft);
ncopy = end ? end - shf->rp + 1 : shf->rnleft; ncopy = end ? end - shf->rp + 1 : shf->rnleft;
if (ncopy > bsize) if (ncopy > bsize)
@ -850,7 +850,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
else if ((sizeof (int) < sizeof (long)) && (c == 'd')) else if ((sizeof (int) < sizeof (long)) && (c == 'd'))
lnum = (long) va_arg(args, int); lnum = (long) va_arg(args, int);
else else
lnum = va_arg(args, unsigned); lnum = va_arg(args, unsigned int);
switch (c) { switch (c) {
case 'd': case 'd':
case 'i': case 'i':
@ -916,7 +916,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
case 's': case 's':
if (!(s = va_arg(args, const char *))) if (!(s = va_arg(args, const char *)))
s = "(null)"; s = "(null)";
len = ksh_mbswidth(s); len = utf_mbswidth(s);
break; break;
case 'c': case 'c':
@ -978,18 +978,14 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
field = 0; field = 0;
if (precision > 0) { if (precision > 0) {
const char *q = s;
nwritten += precision; nwritten += precision;
if (Flag(FUTFHACK)) { while (precision-- > 0)
const char *q = s; utf_cptradj(q, &q);
while (precision-- > 0) do {
utf_cptradj(q, &q);
do {
shf_putc(*s, shf);
} while (++s < q);
} else while (precision-- > 0) {
shf_putc(*s, shf); shf_putc(*s, shf);
s++; } while (++s < q);
}
} }
if (field > 0) { if (field > 0) {
nwritten += field; nwritten += field;

4
tree.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.15 2008/03/01 21:10:26 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/tree.c,v 1.16 2008/04/19 22:15:06 tg Exp $");
#define INDENT 4 #define INDENT 4
@ -375,7 +375,7 @@ vfptreef(struct shf *shf, int indent, const char *fmt, va_list va)
shf_fprintf(shf, "%d", va_arg(va, int)); shf_fprintf(shf, "%d", va_arg(va, int));
break; break;
case 'u': /* decimal */ case 'u': /* decimal */
shf_fprintf(shf, "%u", va_arg(va, unsigned)); shf_fprintf(shf, "%u", va_arg(va, unsigned int));
break; break;
case 'T': /* format tree */ case 'T': /* format tree */
ptree(va_arg(va, struct op *), indent, shf); ptree(va_arg(va, struct op *), indent, shf);

65
var.c
View File

@ -2,7 +2,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.53 2008/04/19 21:04:09 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/var.c,v 1.54 2008/04/19 22:15:06 tg Exp $");
/* /*
* Variables * Variables
@ -298,19 +298,31 @@ str_val(struct tbl *vp)
n = (vp->val.i < 0) ? -vp->val.i : vp->val.i; n = (vp->val.i < 0) ? -vp->val.i : vp->val.i;
base = (vp->type == 0) ? 10 : vp->type; base = (vp->type == 0) ? 10 : vp->type;
*--s = '\0'; if (base == 1) {
do { size_t sz = 1;
*--s = digits[n % base];
n /= base; *(s = strbuf) = '1';
} while (n != 0); s[1] = '#';
if (base != 10) { if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80))
*--s = '#'; s[2] = n & 0xFF;
*--s = digits[base % 10]; else
if (base >= 10) sz = utf_wctomb(s + 2, n);
*--s = digits[base / 10]; s[2 + sz] = '\0';
} else {
*--s = '\0';
do {
*--s = digits[n % base];
n /= base;
} while (n != 0);
if (base != 10) {
*--s = '#';
*--s = digits[base % 10];
if (base >= 10)
*--s = digits[base / 10];
}
if (!(vp->flag & INT_U) && vp->val.i < 0)
*--s = '-';
} }
if (!(vp->flag & INT_U) && vp->val.i < 0)
*--s = '-';
if (vp->flag & (RJUST|LJUST)) /* case already dealt with */ if (vp->flag & (RJUST|LJUST)) /* case already dealt with */
s = formatstr(vp, s); s = formatstr(vp, s);
else else
@ -401,9 +413,8 @@ int
getint(struct tbl *vp, long int *nump, bool arith) getint(struct tbl *vp, long int *nump, bool arith)
{ {
char *s; char *s;
int c; int c, base, neg;
int base, neg; bool have_base = false;
int have_base = 0;
long num; long num;
if (vp->flag&SPECIAL) if (vp->flag&SPECIAL)
@ -431,18 +442,28 @@ getint(struct tbl *vp, long int *nump, bool arith)
s++; s++;
} else } else
base = 8; base = 8;
have_base++; have_base = true;
} }
for (c = *s++; c ; c = *s++) { for (c = *s++; c ; c = *s++) {
if (c == '-') { if (c == '-') {
neg++; neg++;
continue; continue;
} else if (c == '#') { } else if (c == '#') {
base = (int) num; base = (int)num;
if (have_base || base < 2 || base > 36) if (have_base || base < 1 || base > 36)
return -1; return (-1);
if (base == 1) {
unsigned int wc;
if (!Flag(FUTFHACK))
wc = *(unsigned char *)s;
else if (utf_mbtowc(&wc, s) == (size_t)-1)
wc = 0xEF00 + *(unsigned char *)s;
*nump = (long)wc;
return (1);
}
num = 0; num = 0;
have_base = 1; have_base = true;
continue; continue;
} else if (ksh_isdigit(c)) } else if (ksh_isdigit(c))
c -= '0'; c -= '0';
@ -493,7 +514,7 @@ formatstr(struct tbl *vp, const char *s)
char *p, *q; char *p, *q;
size_t psiz; size_t psiz;
olen = ksh_mbswidth(s); olen = utf_mbswidth(s);
if (vp->flag & (RJUST|LJUST)) { if (vp->flag & (RJUST|LJUST)) {
if (!vp->u2.field) /* default field width */ if (!vp->u2.field) /* default field width */