• more unsigned → unsigned int

• more int → bool • more regression tests: check if the utf8-hack flag is really disabled at non-interactive startup, enabled at interactive startup, if the current locale is a UTF-8 one • make the mksh-local multibyte handling functions globally accessible, change their names, syntax and semantics a little (XXX more work needed) • optimise • utf_wctomb: src → dst, as we’re writing to that char array (pasto?) • edit.c:x_e_getmbc(): if the second byte of a 2- or 3-byte multibyte sequence is invalid utf-8, ungetc it (not possible for the 3rd byte yet) • edit.c:x_zotc3(): easier (and faster) handling of UTF-8 • implement, document and test for base-1 numbers: they just get the ASCII (8-bit) or Unicode (UTF-8) value of the octet(s) after the ‘1#’, or do the same as print \x## or \u#### (depending on the utf8-hack flag), plus support the PUA assignment of EF80‥EFFF for the MirBSD encoding “hack” (print doesn’t, as it has \x## and \u#### to distinguish, but we cannot use base-0 numbers which I had planned to use for raw octets first, as they are used internally): http://thread.gmane.org/gmane.os.miros.general/7938 • as an application example, add a hexdumper to the regression tests ☺
2008-04-19 22:15:06 +00:00
parent 4ff0ca0f86
commit 9b62cf15bf
14 changed files with 364 additions and 128 deletions
--- a/var.c
+++ b/var.c
@@ -2,7 +2,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/var.c,v 1.53 2008/04/19 21:04:09 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/var.c,v 1.54 2008/04/19 22:15:06 tg Exp $");

 /*
 * Variables
@@ -298,19 +298,31 @@ str_val(struct tbl *vp)
 			n = (vp->val.i < 0) ? -vp->val.i : vp->val.i;
 		base = (vp->type == 0) ? 10 : vp->type;

-		*--s = '\0';
-		do {
-			*--s = digits[n % base];
-			n /= base;
-		} while (n != 0);
-		if (base != 10) {
-			*--s = '#';
-			*--s = digits[base % 10];
-			if (base >= 10)
-				*--s = digits[base / 10];
+		if (base == 1) {
+			size_t sz = 1;
+
+			*(s = strbuf) = '1';
+			s[1] = '#';
+			if (!Flag(FUTFHACK) || ((n & 0xFF80) == 0xEF80))
+				s[2] = n & 0xFF;
+			else
+				sz = utf_wctomb(s + 2, n);
+			s[2 + sz] = '\0';
+		} else {
+			*--s = '\0';
+			do {
+				*--s = digits[n % base];
+				n /= base;
+			} while (n != 0);
+			if (base != 10) {
+				*--s = '#';
+				*--s = digits[base % 10];
+				if (base >= 10)
+					*--s = digits[base / 10];
+			}
+			if (!(vp->flag & INT_U) && vp->val.i < 0)
+				*--s = '-';
 		}
-		if (!(vp->flag & INT_U) && vp->val.i < 0)
-			*--s = '-';
 		if (vp->flag & (RJUST|LJUST)) /* case already dealt with */
 			s = formatstr(vp, s);
 		else
@@ -401,9 +413,8 @@ int
 getint(struct tbl *vp, long int *nump, bool arith)
 {
 	char *s;
-	int c;
-	int base, neg;
-	int have_base = 0;
+	int c, base, neg;
+	bool have_base = false;
 	long num;

 	if (vp->flag&SPECIAL)
@@ -431,18 +442,28 @@ getint(struct tbl *vp, long int *nump, bool arith)
 				s++;
 		} else
 			base = 8;
-		have_base++;
+		have_base = true;
 	}
 	for (c = *s++; c ; c = *s++) {
 		if (c == '-') {
 			neg++;
 			continue;
 		} else if (c == '#') {
-			base = (int) num;
-			if (have_base || base < 2 || base > 36)
-				return -1;
+			base = (int)num;
+			if (have_base || base < 1 || base > 36)
+				return (-1);
+			if (base == 1) {
+				unsigned int wc;
+
+				if (!Flag(FUTFHACK))
+					wc = *(unsigned char *)s;
+				else if (utf_mbtowc(&wc, s) == (size_t)-1)
+					wc = 0xEF00 + *(unsigned char *)s;
+				*nump = (long)wc;
+				return (1);
+			}
 			num = 0;
-			have_base = 1;
+			have_base = true;
 			continue;
 		} else if (ksh_isdigit(c))
 			c -= '0';
@@ -493,7 +514,7 @@ formatstr(struct tbl *vp, const char *s)
 	char *p, *q;
 	size_t psiz;

-	olen = ksh_mbswidth(s);
+	olen = utf_mbswidth(s);

 	if (vp->flag & (RJUST|LJUST)) {
 		if (!vp->u2.field)	/* default field width */