• more unsigned → unsigned int

• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
  at non-interactive startup, enabled at interactive startup, if the
  current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
  change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
  sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
  ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
  or do the same as print \x## or \u#### (depending on the utf8-hack flag),
  plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
  (print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
  base-0 numbers which I had planned to use for raw octets first, as they are
  used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
This commit is contained in:
tg 2008-04-19 22:15:06 +00:00
parent 4ff0ca0f86
commit 9b62cf15bf
14 changed files with 364 additions and 128 deletions

View File

@ -1,5 +1,5 @@
#!/bin/sh
srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.313 2008/04/02 16:55:05 tg Exp $'
srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.314 2008/04/19 22:15:00 tg Exp $'
#-
# Environment used: CC CFLAGS CPPFLAGS LDFLAGS LIBS NOWARN NROFF TARGET_OS
# CPPFLAGS recognised: MKSH_SMALL MKSH_ASSUME_UTF8 MKSH_NOPWNAM MKSH_NOVI
@ -760,7 +760,7 @@ phase=x
NOWARN=$save_NOWARN # gcc runs with -Werror until here
ac_test expstmt '' "if the compiler supports statements as expressions" <<-'EOF'
#define ksh_isspace(c) ({ \
unsigned ksh_isspace_c = (c); \
unsigned int ksh_isspace_c = (c); \
(ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || \
(ksh_isspace_c == 0x20); \
})

205
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.177 2008/04/19 22:03:18 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.178 2008/04/19 22:15:01 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -7,7 +7,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R33 2008/04/16
@(#)MIRBSD KSH R33 2008/04/19
description:
Check version of shell.
category: pdksh
@ -4373,6 +4373,35 @@ expected-stdout:
off
on
---
name: utf8opt-1
description:
Check that the utf8-hack flag is not set at non-interactive startup
category: pdksh
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
print is set
else
print is not set
fi
expected-stdout:
is not set
---
name: utf8opt-2
description:
Check that the utf8-hack flag is set at interactive startup
category: pdksh
arguments: !-i!
env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
stdin:
if [[ $(set +o) = *@(-o utf8-hack)@(| *) ]]; then
print is set
else
print is not set
fi
expected-stdout:
is set
---
name: aliases-1
description:
Check if built-in shell aliases are okay
@ -4617,3 +4646,175 @@ expected-stdout:
bar
rab
---
name: integer-base-one-1
description:
check if the use of fake integer base 1 works
stdin:
set -o utf8-hack
typeset -Uui16 i0=1#ï i1=1#€
typeset -i1 o0a=64
typeset -i1 o1a=0x263A
typeset -Uui1 o0b=0x7E
typeset -Uui1 o1b=0xFDD0
integer px=0xCAFE 'p0=1# ' p1=1#… pl=1#f
print "in <$i0> <$i1>"
print "out <${o0a#1#}|${o0b#1#}> <${o1a#1#}|${o1b#1#}>"
typeset -Uui1 i0 i1
print "pass <$px> <$p0> <$p1> <$pl> <${i0#1#}|${i1#1#}>"
typeset -Uui16 tv1=1#~ tv2=1# tv3=1#€ tv4=1#<23> tv5=1#À tv6=1#Á tv7=1#  tv8=1#€
print "specX <${tv1#16#}> <${tv2#16#}> <${tv3#16#}> <${tv4#16#}> <${tv5#16#}> <${tv6#16#}> <${tv7#16#}> <${tv8#16#}>"
typeset -i1 tv1 tv2 tv3 tv4 tv5 tv6 tv7 tv8
print "specW <${tv1#1#}> <${tv2#1#}> <${tv3#1#}> <${tv4#1#}> <${tv5#1#}> <${tv6#1#}> <${tv7#1#}> <${tv8#1#}>"
typeset -i1 xs1=0xEF7F xs2=0xEF80 xs3=0xFDD0
print "specU <${xs1#1#}> <${xs2#1#}> <${xs3#1#}>"
expected-stdout:
in <16#EFEF> <16#20AC>
out <@|~> <☺|ï·<C3AF>>
pass <16#cafe> <1# > <1#…> <1#f> <ï|€>
specX <7E> <7F> <EF80> <EF81> <EFC0> <EFC1> <A0> <80>
specW <~> <> <€> <<3C>> <À> <Á> < > <€>
specU <> <€> <ï·<C3AF>>
---
name: integer-base-one-2a
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
integer x=1#foo
print /$x/
expected-stderr-pattern:
/1#foo: unexpected 'oo'/
expected-exit: e != 0
---
name: integer-base-one-2b
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
integer x=1#À€
print /$x/
expected-stderr-pattern:
/1#À€: unexpected '€'/
expected-exit: e != 0
---
name: integer-base-one-2c1
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set -o utf8-hack
integer x=1#…
print /$x/
expected-stdout:
/1#…/
---
name: integer-base-one-2c2
description:
check if the use of fake integer base 1 stops at correct characters
stdin:
set +o utf8-hack
integer x=1#…
print /$x/
expected-stderr-pattern:
/1#…: unexpected '€'/
expected-exit: e != 0
---
name: integer-base-one-3a
description:
some sample code for hexdumping
stdin:
print 'Hello, World!\\\nã<6E>“ã“ã<E2809C>«ã<C2AB>¡ã<C2A1>¯ï¼<C3AF>' | {
typeset -Uui16 -Z11 pos=0
typeset -Uui16 -Z5 hv
typeset -i1 wc=0x0A
dasc=
nl=${wc#1#}
while IFS= read -r line; do
line=$line$nl
while [[ -n $line ]]; do
hv=1#${line::1}
if (( (pos & 15) == 0 )); then
(( pos )) && print "$dasc|"
print -n "${pos#16#} "
dasc=' |'
fi
print -n "${hv#16#} "
if (( (hv < 32) || (hv > 126) )); then
dasc=$dasc.
else
dasc=$dasc${line::1}
fi
(( (pos++ & 15) == 7 )) && print -n -- '- '
line=${line:1}
done
done
if (( (pos & 15) != 1 )); then
while (( pos & 15 )); do
print -n ' '
(( (pos++ & 15) == 7 )) && print -n -- '- '
done
print "$dasc|"
fi
}
expected-stdout:
00000000 48 65 6C 6C 6F 2C 20 57 - 6F 72 6C 64 21 5C 0A E3 |Hello, World!\..|
00000010 81 93 E3 82 93 E3 81 AB - E3 81 A1 E3 81 AF EF BC |................|
00000020 81 0A - |..|
---
name: integer-base-one-3b
description:
some sample code for hexdumping Unicode
as of now, doesn't work because illicit assignments break
expected-fail: yes
stdin:
set -o utf8-hack
print 'Hello, World!\\\nã<6E>“ã“ã<E2809C>«ã<C2AB>¡ã<C2A1>¯ï¼<C3AF>' | {
typeset -Uui16 -Z11 pos=0
typeset -Uui16 -Z5 hv
typeset -i1 wc=0x0A
dasc=
nl=${wc#1#}
integer n
while IFS= read -r line; do
line=$line$nl
while [[ -n $line ]]; do
if (( ${#line} > 2 )) && wc=1#${line::3}; then
n=3
elif (( ${#line} > 1 )) && wc=1#${line::2}; then
n=2
else
wc=1#${line::1}
n=3
fi
if (( (wc < 32) || \
((wc > 126) && (wc < 160)) )); then
dasc=$dasc.
elif (( wc < 0x0800 )); then
dasc=$dasc${wc#1#}
fi
while (( n-- )); do
if (( (pos & 15) == 0 )); then
(( pos )) && print "$dasc|"
print -n "${pos#16#} "
dasc=' |'
fi
hv=1#${line::1}
print -n "${hv#16#} "
(( (pos++ & 15) == 7 )) && print -- '- '
line=${line:1}
done
(( wc >= 0x0800 )) && dasc=$dasc${wc#1#}
done
done
if (( pos & 15 )); then
while (( pos & 15 )); do
print -n ' '
(( (pos++ & 15) == 7 )) && print -- '- '
done
print "$dasc|"
fi
}
expected-stdout:
00000000 48 65 6C 6C 6F 2C 20 57 - 6F 72 6C 64 21 5C 0A E3 |Hello, World!\.|
00000010 81 93 E3 82 93 E3 81 AB - E3 81 A1 E3 81 AF EF BC |ã<EFBFBD>ãã<EFBFBD>«ã<EFBFBD>¡ã<EFBFBD>¯|
00000020 81 0A - |ï¼<EFBFBD>.|
---

93
edit.c
View File

@ -5,7 +5,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.121 2008/04/19 17:25:49 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.122 2008/04/19 22:15:01 tg Exp $");
/* tty driver characters we are interested in */
typedef struct {
@ -58,9 +58,6 @@ static int x_file_glob(int, const char *, int, char ***);
static int x_command_glob(int, const char *, int, char ***);
static int x_locate_word(const char *, int, int, int *, bool *);
static size_t mbxtowc(unsigned *, const char *);
static size_t wcxtomb(char *, unsigned);
static int wcxwidth(unsigned);
static int x_e_getmbc(char *);
static char *utf_getcpfromcols(char *, int);
@ -770,24 +767,21 @@ int
utf_widthadj(const char *src, const char **dst)
{
size_t len;
unsigned wc;
unsigned int wc;
int width;
if (!Flag(FUTFHACK) || *(const unsigned char *)src <= 0x7F ||
(len = mbxtowc(&wc, src)) == (size_t)-1) {
if (dst)
*dst = src + 1;
return (1);
}
if (!Flag(FUTFHACK) || (len = utf_mbtowc(&wc, src)) == (size_t)-1)
len = width = 1;
else
width = utf_wcwidth(wc);
if (dst)
*dst = src + len;
width = wcxwidth(wc);
return (width == -1 ? 2 : width);
}
int
ksh_mbswidth(const char *s)
utf_mbswidth(const char *s)
{
size_t len;
unsigned int wc;
@ -797,8 +791,8 @@ ksh_mbswidth(const char *s)
return (strlen(s));
while (*s)
if (((len = mbxtowc(&wc, s)) == (size_t)-1) ||
((cw = wcxwidth(wc)) == -1)) {
if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
((cw = utf_wcwidth(wc)) == -1)) {
s++;
width += 1;
} else {
@ -870,8 +864,8 @@ utf_getcpfromcols(char *p, int cols)
__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.4 2006/11/01 20:01:20 tg Exp $");
static int
wcxwidth(unsigned c)
int
utf_wcwidth(unsigned int c)
{
static const struct cbset {
unsigned short first;
@ -953,8 +947,8 @@ wcxwidth(unsigned c)
/* --- begin of mbrtowc.c excerpt --- */
__RCSID("$miros: src/lib/libc/i18n/mbrtowc.c,v 1.13 2006/11/01 20:01:19 tg Exp $");
static size_t
mbxtowc(unsigned *dst, const char *src)
size_t
utf_mbtowc(unsigned int *dst, const char *src)
{
const unsigned char *s = (const unsigned char *)src;
unsigned int c, wc, count;
@ -1002,29 +996,28 @@ mbxtowc(unsigned *dst, const char *src)
/* --- begin of wcrtomb.c excerpt --- */
__RCSID("$miros: src/lib/libc/i18n/wcrtomb.c,v 1.14 2006/11/01 20:12:44 tg Exp $");
static size_t
wcxtomb(char *src, unsigned wc)
size_t
utf_wctomb(char *dst, unsigned int wc)
{
unsigned char *s = (unsigned char *)src;
unsigned int count;
unsigned char count, *d = (unsigned char *)dst;
if (wc > 0xFFFD)
wc = 0xFFFD;
if (wc < 0x80) {
count = 0;
*s++ = wc;
*d++ = wc;
} else if (wc < 0x0800) {
count = 1;
*s++ = (wc >> 6) | 0xC0;
*d++ = (wc >> 6) | 0xC0;
} else {
count = 2;
*s++ = (wc >> 12) | 0xE0;
*d++ = (wc >> 12) | 0xE0;
}
while (count) {
*s++ = ((wc >> (6 * --count)) & 0x3F) | 0x80;
*d++ = ((wc >> (6 * --count)) & 0x3F) | 0x80;
}
return ((char *)s - src);
return ((char *)d - dst);
}
/* --- end of wcrtomb.c excerpt --- */
@ -1435,17 +1428,22 @@ x_e_getmbc(char *sbuf)
return (-1);
if (Flag(FUTFHACK)) {
if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
buf[pos++] = c = x_e_getc();
c = x_e_getc();
if (c == -1)
return (-1);
if ((c & 0xC0) != 0x80) {
x_e_ungetc(c);
return (1);
}
buf[pos++] = c;
}
if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) {
/* XXX x_e_ungetc is one-octet only */
buf[pos++] = c = x_e_getc();
if (c == -1)
return (-1);
}
}
buf[pos] = '\0';
return (pos);
}
@ -1910,17 +1908,24 @@ x_zotc2(int c)
static void
x_zotc3(char **cp)
{
unsigned c = **(unsigned char **)cp;
unsigned char c = **(unsigned char **)cp;
if (c == 0xC2 && Flag(FUTFHACK)) {
unsigned char c2 = ((unsigned char *)*cp)[1];
if (c2 >= 0x80 && c2 < 0xA0) {
c = c2;
(*cp)++;
}
}
if (c == '\t') {
/* Kludge, tabs are always four spaces. */
x_e_puts(" ");
(*cp)++;
} else if (c < ' ' || c == 0x7f || (Flag(FUTFHACK) && c == 0xC2 &&
((unsigned char *)*cp)[1] < 0xA0 && mbxtowc(&c, *cp))) {
} else if (c < ' ' || (c >= 0x7F && c < 0xA0)) {
x_e_putc2('^');
x_e_putc2(UNCTRL(c));
*cp += c & 0x80 ? 2 : 1;
(*cp)++;
} else
x_e_putc3((const char **)cp);
}
@ -2372,35 +2377,35 @@ x_transpose(int c __unused)
* cursor, do not change cursor position
*/
x_bs2(xcp = utf_backch(xcp));
if (mbxtowc(&tmpa, xcp) == (size_t)-1) {
if (utf_mbtowc(&tmpa, xcp) == (size_t)-1) {
x_e_putc2(7);
return KSTD;
}
x_bs2(xcp = utf_backch(xcp));
if (mbxtowc(&tmpb, xcp) == (size_t)-1) {
if (utf_mbtowc(&tmpb, xcp) == (size_t)-1) {
x_e_putc2(7);
return KSTD;
}
wcxtomb(xcp, tmpa);
utf_wctomb(xcp, tmpa);
x_zotc3(&xcp);
wcxtomb(xcp, tmpb);
utf_wctomb(xcp, tmpb);
x_zotc3(&xcp);
} else {
/* GNU emacs style: Swap the characters before and under the
* cursor, move cursor position along one.
*/
if (mbxtowc(&tmpa, xcp) == (size_t)-1) {
if (utf_mbtowc(&tmpa, xcp) == (size_t)-1) {
x_e_putc2(7);
return KSTD;
}
x_bs2(xcp = utf_backch(xcp));
if (mbxtowc(&tmpb, xcp) == (size_t)-1) {
if (utf_mbtowc(&tmpb, xcp) == (size_t)-1) {
x_e_putc2(7);
return KSTD;
}
wcxtomb(xcp, tmpa);
utf_wctomb(xcp, tmpa);
x_zotc3(&xcp);
wcxtomb(xcp, tmpb);
utf_wctomb(xcp, tmpb);
x_zotc3(&xcp);
}
return KSTD;
@ -2957,13 +2962,13 @@ x_e_putc2(int c)
if (c < 0xA0)
c = 0xFFFD;
x = wcxtomb(utf_tmp, c);
x = utf_wctomb(utf_tmp, c);
x_putc(utf_tmp[0]);
if (x > 1)
x_putc(utf_tmp[1]);
if (x > 2)
x_putc(utf_tmp[2]);
width = wcxwidth(c);
width = utf_wcwidth(c);
} else
x_putc(c);
switch (c) {

6
exec.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.42 2008/04/19 17:21:53 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.43 2008/04/19 22:15:02 tg Exp $");
static int comexec(struct op *, struct tbl *volatile, const char **,
int volatile);
@ -1349,7 +1349,7 @@ pr_menu(const char *const *ap)
* get dimensions of the list
*/
for (n = 0, nwidth = 0, pp = ap; *pp; n++, pp++) {
i = ksh_mbswidth(*pp);
i = utf_mbswidth(*pp);
nwidth = (i > nwidth) ? i : nwidth;
}
/*
@ -1388,7 +1388,7 @@ pr_list(char *const *ap)
int nwidth, i, n;
for (n = 0, nwidth = 0, pp = ap; *pp; n++, pp++) {
i = ksh_mbswidth(*pp);
i = utf_mbswidth(*pp);
nwidth = (i > nwidth) ? i : nwidth;
}
print_columns(shl_out, n, plain_fmt_entry, (const void *)ap,

11
expr.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.14 2008/03/28 13:33:37 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.15 2008/04/19 22:15:02 tg Exp $");
/* The order of these enums is constrained by the order of opinfo[] */
enum token {
@ -475,10 +475,15 @@ exprtoken(Expr_state *es)
afree(tvar, ATEMP);
}
es->tok = VAR;
} else if (c == '1' && cp[1] == '#') {
utf_cptradj(cp + 2, &cp);
tvar = str_nsave(es->tokp, cp - es->tokp, ATEMP);
goto process_tvar;
} else if (ksh_isdigit(c)) {
for (; c != '_' && (ksh_isalnux(c) || c == '#'); c = *cp++)
;
while (c != '_' && (ksh_isalnux(c) || c == '#'))
c = *cp++;
tvar = str_nsave(es->tokp, --cp - es->tokp, ATEMP);
process_tvar:
es->val = tempvar();
es->val->flag &= ~INTEGER;
es->val->type = 0;

View File

@ -5,7 +5,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.76 2008/04/16 21:56:01 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.77 2008/04/19 22:15:03 tg Exp $");
/* A leading = means assignments before command are kept;
* a leading * means a POSIX special builtin;
@ -1591,7 +1591,7 @@ c_umask(const char **wp)
p[-1] = '\0';
shprintf("%s\n", buf);
} else
shprintf("%#3.3o\n", (unsigned) old_umask);
shprintf("%#3.3o\n", (unsigned int)old_umask);
} else {
mode_t new_umask;

4
lex.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.57 2008/03/28 13:46:53 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.58 2008/04/19 22:15:03 tg Exp $");
/*
* states while lexing word
@ -1310,7 +1310,7 @@ dopprompt(const char *cp, int ntruncate, int doprint)
columns--;
} else if (*cp == delimiter)
indelimit = !indelimit;
else if (Flag(FUTFHACK) && ((unsigned)*cp > 0x7F)) {
else if (Flag(FUTFHACK) && ((unsigned char)*cp > 0x7F)) {
const char *cp2;
columns += utf_widthadj(cp, &cp2);
if (doprint && (indelimit ||

12
misc.c
View File

@ -6,7 +6,7 @@
#include <grp.h>
#endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.74 2008/04/19 17:21:54 tg Exp $\t"
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.75 2008/04/19 22:15:03 tg Exp $\t"
MKSH_SH_H_ID);
#undef USE_CHVT
@ -35,7 +35,7 @@ static char *do_phys_path(XString *, char *, const char *);
void
setctypes(const char *s, int t)
{
unsigned i;
unsigned int i;
if (t & C_IFS) {
for (i = 0; i < UCHAR_MAX + 1; i++)
@ -88,7 +88,7 @@ str_save(const char *s, Area *ap)
/* called from XcheckN() to grow buffer */
char *
Xcheck_grow_(XString *xsp, const char *xp, unsigned more)
Xcheck_grow_(XString *xsp, const char *xp, unsigned int more)
{
const char *old_beg = xsp->beg;
@ -185,7 +185,7 @@ options_fmt_entry(const void *arg, int i, char *buf, int buflen)
static void
printoptions(int verbose)
{
unsigned i;
unsigned int i;
if (verbose) {
struct options_info oi;
@ -216,7 +216,7 @@ printoptions(int verbose)
char *
getoptions(void)
{
unsigned i;
unsigned int i;
char m[(int) FNFLAGS + 1];
char *cp = m;
@ -1446,7 +1446,7 @@ stristr(const char *b, const char *l)
#if !HAVE_EXPSTMT
bool
ksh_isspace_(unsigned ksh_isspace_c)
ksh_isspace_(unsigned int ksh_isspace_c)
{
return ((ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) ||
(ksh_isspace_c == 0x20));

10
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.117 2008/04/19 21:18:52 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.118 2008/04/19 22:15:04 tg Exp $
.\" $OpenBSD: ksh.1,v 1.121 2008/03/21 12:51:19 millert Exp $
.\"-
.\" Try to make GNU groff and AT&T nroff more compatible
@ -2110,6 +2110,14 @@ extension, in all forms of arithmetic expressions,
except as numeric arguments to the
.Ic test
command.
As a special
.Nm mksh
extension, numbers to the base of one are treated as either (8-bit
transparent) ASCII or Unicode codepoints, depending on the shell's
.Ic utf8\-hack
flag (current setting).
In Unicode mode, raw octets are mapped into the range EF80..EFFF,
which is in the PUA and has been assigned by CSUR for this use.
.Pp
The operators are evaluated as follows:
.Bl -tag -width Ds -offset indent

View File

@ -1,5 +1,4 @@
/** $MirOS: src/bin/mksh/setmode.c,v 1.10 2008/04/06 23:27:19 tg Exp $ */
/** $miros: src/lib/libc/gen/setmode.c,v 1.9 2007/10/25 15:13:39 tg Exp $ */
/** $MirOS: src/bin/mksh/setmode.c,v 1.11 2008/04/19 22:15:05 tg Exp $ */
/* $OpenBSD: setmode.c,v 1.17 2005/08/08 08:05:34 espie Exp $ */
/* $NetBSD: setmode.c,v 1.15 1997/02/07 22:21:06 christos Exp $ */
@ -58,23 +57,21 @@
#endif
__SCCSID("@(#)setmode.c 8.2 (Berkeley) 3/25/94");
__RCSID("$MirOS: src/bin/mksh/setmode.c,v 1.10 2008/04/06 23:27:19 tg Exp $");
__RCSID("$miros: src/lib/libc/gen/setmode.c,v 1.9 2007/10/25 15:13:39 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/setmode.c,v 1.11 2008/04/19 22:15:05 tg Exp $");
__RCSID("$miros: src/lib/libc/gen/setmode.c,v 1.10 2008/04/19 16:27:23 tg Exp $");
/* for mksh */
#ifdef ksh_isdigit
#undef isdigit
#define isdigit ksh_isdigit
#define isdigit ksh_isdigit
#endif
/* for portability */
#ifndef S_ISTXT
#define S_ISTXT 0001000
#endif
#define SET_LEN 6 /* initial # of bitcmd struct to malloc */
#define SET_LEN_INCR 4 /* # of bitcmd structs to add as needed */
#define SET_LEN 6 /* initial # of bitcmd struct to malloc */
#define SET_LEN_INCR 4 /* # of bitcmd structs to add as needed */
typedef struct bitcmd {
char cmd;
@ -88,7 +85,7 @@ typedef struct bitcmd {
#define CMD2_OBITS 0x08
#define CMD2_UBITS 0x10
static BITCMD *addcmd(BITCMD *, int, int, int, unsigned);
static BITCMD *addcmd(BITCMD *, int, int, int, unsigned int);
static void compress_mode(BITCMD *);
#ifdef SETMODE_DEBUG
static void dumpmode(BITCMD *);
@ -364,7 +361,7 @@ setmode(const char *p)
}
static BITCMD *
addcmd(BITCMD *set, int op, int who, int oparg, unsigned mask)
addcmd(BITCMD *set, int op, int who, int oparg, unsigned int mask)
{
switch (op) {
case '=':

17
sh.h
View File

@ -8,8 +8,8 @@
/* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */
/* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.206 2008/04/19 17:21:54 tg Exp $"
#define MKSH_VERSION "R33 2008/04/16"
#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.207 2008/04/19 22:15:05 tg Exp $"
#define MKSH_VERSION "R33 2008/04/19"
#if HAVE_SYS_PARAM_H
#include <sys/param.h>
@ -156,12 +156,12 @@ typedef int bool;
#if HAVE_EXPSTMT
/* this macro must not evaluate its arguments several times */
#define ksh_isspace(c) ({ \
unsigned ksh_isspace_c = (c); \
unsigned int ksh_isspace_c = (c); \
(ksh_isspace_c >= 0x09 && ksh_isspace_c <= 0x0D) || \
(ksh_isspace_c == 0x20); \
})
#else
#define ksh_isspace(c) ksh_isspace_((unsigned)(c))
#define ksh_isspace(c) ksh_isspace_((unsigned int)(c))
#endif
#ifndef S_ISLNK
@ -1054,7 +1054,7 @@ typedef char *XStringP;
#define Xsavepos(xs, xp) ((xp) - (xs).beg)
#define Xrestpos(xs, xp, n) ((xs).beg + (n))
char *Xcheck_grow_(XString *, const char *, unsigned);
char *Xcheck_grow_(XString *, const char *, unsigned int);
/*
* expandable vector of generic pointers
@ -1219,9 +1219,12 @@ void x_init(void);
int x_read(char *, size_t);
int x_bind(const char *, const char *, int, int);
/* UTF-8 hack stuff */
size_t utf_mbtowc(unsigned int *, const char *);
size_t utf_wctomb(char *, unsigned int);
size_t utf_cptradj(const char *, const char **);
int ksh_mbswidth(const char *);
int utf_widthadj(const char *, const char **);
int utf_mbswidth(const char *);
int utf_wcwidth(unsigned int);
/* eval.c */
char *substitute(const char *, int);
char **eval(const char **, int);
@ -1439,7 +1442,7 @@ void simplify_path(char *);
char *get_phys_path(const char *);
void set_current_wd(char *);
#if !HAVE_EXPSTMT
bool ksh_isspace_(unsigned);
bool ksh_isspace_(unsigned int);
#endif
/* shf.c */
struct shf *shf_open(const char *, int, int, int);

38
shf.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.18 2008/04/19 17:21:55 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.19 2008/04/19 22:15:05 tg Exp $");
/* flags to shf_emptybuf() */
#define EB_READSW 0x01 /* about to switch to reading */
@ -31,7 +31,7 @@ shf_open(const char *name, int oflags, int mode, int sflags)
/* Done before open so if alloca fails, fd won't be lost. */
shf = (struct shf *) alloc(sizeof(struct shf) + bsize, ATEMP);
shf->areap = ATEMP;
shf->buf = (unsigned char *) &shf[1];
shf->buf = (unsigned char *)&shf[1];
shf->bsize = bsize;
shf->flags = SHF_ALLOCS;
/* Rest filled in by reopen. */
@ -92,13 +92,13 @@ shf_fdopen(int fd, int sflags, struct shf *shf)
if (shf) {
if (bsize) {
shf->buf = (unsigned char *) alloc(bsize, ATEMP);
shf->buf = (unsigned char *)alloc(bsize, ATEMP);
sflags |= SHF_ALLOCB;
} else
shf->buf = NULL;
} else {
shf = (struct shf *) alloc(sizeof(struct shf) + bsize, ATEMP);
shf->buf = (unsigned char *) &shf[1];
shf = (struct shf *)alloc(sizeof(struct shf) + bsize, ATEMP);
shf->buf = (unsigned char *)&shf[1];
sflags |= SHF_ALLOCS;
}
shf->areap = ATEMP;
@ -191,7 +191,7 @@ shf_sopen(char *buf, int bsize, int sflags, struct shf *shf)
buf = alloc(bsize, shf->areap);
}
shf->fd = -1;
shf->buf = shf->rp = shf->wp = (unsigned char *) buf;
shf->buf = shf->rp = shf->wp = (unsigned char *)buf;
shf->rnleft = bsize;
shf->rbsize = bsize;
shf->wnleft = bsize - 1; /* space for a '\0' */
@ -314,7 +314,7 @@ shf_emptybuf(struct shf *shf, int flags)
shf->flags &= ~SHF_READING;
}
if (shf->flags & SHF_STRING) {
unsigned char *nbuf;
unsigned char *nbuf;
/* Note that we assume SHF_ALLOCS is not set if SHF_ALLOCB
* is set... (changing the shf pointer could cause problems)
@ -323,7 +323,7 @@ shf_emptybuf(struct shf *shf, int flags)
!(shf->flags & SHF_ALLOCB))
return EOF;
/* allocate more space for buffer */
nbuf = (unsigned char *) aresize(shf->buf, shf->wbsize * 2,
nbuf = (unsigned char *)aresize(shf->buf, shf->wbsize * 2,
shf->areap);
shf->rp = nbuf + (shf->rp - shf->buf);
shf->wp = nbuf + (shf->wp - shf->buf);
@ -477,7 +477,7 @@ shf_getse(char *buf, int bsize, struct shf *shf)
return buf == orig_buf ? NULL : buf;
}
}
end = (unsigned char *) memchr((char *) shf->rp, '\n',
end = (unsigned char *)memchr((char *) shf->rp, '\n',
shf->rnleft);
ncopy = end ? end - shf->rp + 1 : shf->rnleft;
if (ncopy > bsize)
@ -850,7 +850,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
else if ((sizeof (int) < sizeof (long)) && (c == 'd'))
lnum = (long) va_arg(args, int);
else
lnum = va_arg(args, unsigned);
lnum = va_arg(args, unsigned int);
switch (c) {
case 'd':
case 'i':
@ -916,7 +916,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
case 's':
if (!(s = va_arg(args, const char *)))
s = "(null)";
len = ksh_mbswidth(s);
len = utf_mbswidth(s);
break;
case 'c':
@ -978,18 +978,14 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
field = 0;
if (precision > 0) {
const char *q = s;
nwritten += precision;
if (Flag(FUTFHACK)) {
const char *q = s;
while (precision-- > 0)
utf_cptradj(q, &q);
do {
shf_putc(*s, shf);
} while (++s < q);
} else while (precision-- > 0) {
while (precision-- > 0)
utf_cptradj(q, &q);
do {
shf_putc(*s, shf);
s++;
}
} while (++s < q);
}
if (field > 0) {
nwritten += field;

4
tree.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.15 2008/03/01 21:10:26 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.16 2008/04/19 22:15:06 tg Exp $");
#define INDENT 4
@ -375,7 +375,7 @@ vfptreef(struct shf *shf, int indent, const char *fmt, va_list va)
shf_fprintf(shf, "%d", va_arg(va, int));
break;
case 'u': /* decimal */
shf_fprintf(shf, "%u", va_arg(va, unsigned));
shf_fprintf(shf, "%u", va_arg(va, unsigned int));
break;
case 'T': /* format tree */
ptree(va_arg(va, struct op *), indent, shf);

65
var.c
View File

@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.53 2008/04/19 21:04:09 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.54 2008/04/19 22:15:06 tg Exp $");
/*
* Variables
@ -298,19 +298,31 @@ str_val(struct tbl *vp)
n = (vp->val.i < 0) ? -vp->val.i : vp->val.i;
base = (vp->type == 0) ? 10 : vp->type;
*--s = '\0';
do {
*--s = digits[n % base];
n /= base;
} while (n != 0);
if (base != 10) {
*--s = '#';
*--s = digits[base % 10];
if (base >= 10)
*--s = digits[base / 10];
if (base == 1) {
size_t sz = 1;
*(s = strbuf) = '1';
s[1] = '#';
if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80))
s[2] = n & 0xFF;
else
sz = utf_wctomb(s + 2, n);
s[2 + sz] = '\0';
} else {
*--s = '\0';
do {
*--s = digits[n % base];
n /= base;
} while (n != 0);
if (base != 10) {
*--s = '#';
*--s = digits[base % 10];
if (base >= 10)
*--s = digits[base / 10];
}
if (!(vp->flag & INT_U) && vp->val.i < 0)
*--s = '-';
}
if (!(vp->flag & INT_U) && vp->val.i < 0)
*--s = '-';
if (vp->flag & (RJUST|LJUST)) /* case already dealt with */
s = formatstr(vp, s);
else
@ -401,9 +413,8 @@ int
getint(struct tbl *vp, long int *nump, bool arith)
{
char *s;
int c;
int base, neg;
int have_base = 0;
int c, base, neg;
bool have_base = false;
long num;
if (vp->flag&SPECIAL)
@ -431,18 +442,28 @@ getint(struct tbl *vp, long int *nump, bool arith)
s++;
} else
base = 8;
have_base++;
have_base = true;
}
for (c = *s++; c ; c = *s++) {
if (c == '-') {
neg++;
continue;
} else if (c == '#') {
base = (int) num;
if (have_base || base < 2 || base > 36)
return -1;
base = (int)num;
if (have_base || base < 1 || base > 36)
return (-1);
if (base == 1) {
unsigned int wc;
if (!Flag(FUTFHACK))
wc = *(unsigned char *)s;
else if (utf_mbtowc(&wc, s) == (size_t)-1)
wc = 0xEF00 + *(unsigned char *)s;
*nump = (long)wc;
return (1);
}
num = 0;
have_base = 1;
have_base = true;
continue;
} else if (ksh_isdigit(c))
c -= '0';
@ -493,7 +514,7 @@ formatstr(struct tbl *vp, const char *s)
char *p, *q;
size_t psiz;
olen = ksh_mbswidth(s);
olen = utf_mbswidth(s);
if (vp->flag & (RJUST|LJUST)) {
if (!vp->u2.field) /* default field width */