• more unsigned → unsigned int

• more int → bool
• more regression tests: check if the utf8-hack flag is really disabled
  at non-interactive startup, enabled at interactive startup, if the
  current locale is a UTF-8 one
• make the mksh-local multibyte handling functions globally accessible,
  change their names, syntax and semantics a little (XXX more work needed)
• optimise
• utf_wctomb: src → dst, as we’re writing to that char array (pasto?)
• edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte
  sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet)
• edit.c:x_zotc3(): easier (and faster) handling of UTF-8
• implement, document and test for base-1 numbers: they just get the
  ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’,
  or do the same as print \x## or \u#### (depending on the utf8-hack flag),
  plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack”
  (print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use
  base-0 numbers which I had planned to use for raw octets first, as they are
  used internally): http://thread.gmane.org/gmane.os.miros.general/7938
• as an application example, add a hexdumper to the regression tests ☺
This commit is contained in:
tg
2008-04-19 22:15:06 +00:00
parent 4ff0ca0f86
commit 9b62cf15bf
14 changed files with 364 additions and 128 deletions

65
var.c
View File

@@ -2,7 +2,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.53 2008/04/19 21:04:09 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.54 2008/04/19 22:15:06 tg Exp $");
/*
* Variables
@@ -298,19 +298,31 @@ str_val(struct tbl *vp)
n = (vp->val.i < 0) ? -vp->val.i : vp->val.i;
base = (vp->type == 0) ? 10 : vp->type;
*--s = '\0';
do {
*--s = digits[n % base];
n /= base;
} while (n != 0);
if (base != 10) {
*--s = '#';
*--s = digits[base % 10];
if (base >= 10)
*--s = digits[base / 10];
if (base == 1) {
size_t sz = 1;
*(s = strbuf) = '1';
s[1] = '#';
if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80))
s[2] = n & 0xFF;
else
sz = utf_wctomb(s + 2, n);
s[2 + sz] = '\0';
} else {
*--s = '\0';
do {
*--s = digits[n % base];
n /= base;
} while (n != 0);
if (base != 10) {
*--s = '#';
*--s = digits[base % 10];
if (base >= 10)
*--s = digits[base / 10];
}
if (!(vp->flag & INT_U) && vp->val.i < 0)
*--s = '-';
}
if (!(vp->flag & INT_U) && vp->val.i < 0)
*--s = '-';
if (vp->flag & (RJUST|LJUST)) /* case already dealt with */
s = formatstr(vp, s);
else
@@ -401,9 +413,8 @@ int
getint(struct tbl *vp, long int *nump, bool arith)
{
char *s;
int c;
int base, neg;
int have_base = 0;
int c, base, neg;
bool have_base = false;
long num;
if (vp->flag&SPECIAL)
@@ -431,18 +442,28 @@ getint(struct tbl *vp, long int *nump, bool arith)
s++;
} else
base = 8;
have_base++;
have_base = true;
}
for (c = *s++; c ; c = *s++) {
if (c == '-') {
neg++;
continue;
} else if (c == '#') {
base = (int) num;
if (have_base || base < 2 || base > 36)
return -1;
base = (int)num;
if (have_base || base < 1 || base > 36)
return (-1);
if (base == 1) {
unsigned int wc;
if (!Flag(FUTFHACK))
wc = *(unsigned char *)s;
else if (utf_mbtowc(&wc, s) == (size_t)-1)
wc = 0xEF00 + *(unsigned char *)s;
*nump = (long)wc;
return (1);
}
num = 0;
have_base = 1;
have_base = true;
continue;
} else if (ksh_isdigit(c))
c -= '0';
@@ -493,7 +514,7 @@ formatstr(struct tbl *vp, const char *s)
char *p, *q;
size_t psiz;
olen = ksh_mbswidth(s);
olen = utf_mbswidth(s);
if (vp->flag & (RJUST|LJUST)) {
if (!vp->u2.field) /* default field width */