From cc725e67ca8039e625b4390b95b7680d24c9edd1 Mon Sep 17 00:00:00 2001
From: tg <tg@mirbsd.org>
Date: Fri, 5 May 2017 20:36:03 +0000
Subject: [PATCH] =?UTF-8?q?switch=20EBCDIC=20to=20=E2=80=9Cnega-UTF8?=
 =?UTF-8?q?=E2=80=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 edit.c | 28 +++++++++++++++++++---------
 eval.c |  6 +++---
 expr.c | 23 +++++++++++------------
 lex.c  | 16 ++++++++--------
 tree.c |  4 ++--
 var.c  | 10 ++++++----
 6 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/edit.c b/edit.c
index b8e93f2..47bec8f 100644
--- a/edit.c
+++ b/edit.c
@@ -28,7 +28,7 @@
 
 #ifndef MKSH_NO_CMDLINE_EDITING
 
-__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.335 2017/04/29 22:04:26 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.336 2017/05/05 20:36:00 tg Exp $");
 
 /*
  * in later versions we might use libtermcap for this, but since external
@@ -714,8 +714,8 @@ x_longest_prefix(int nwords, char * const * words)
 				break;
 			}
 	/* false for nwords==1 as 0 = words[0][prefix_len] then */
-	if (UTFMODE && prefix_len && (words[0][prefix_len] & 0xC0) == 0x80)
-		while (prefix_len && (words[0][prefix_len] & 0xC0) != 0xC0)
+	if (UTFMODE && prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) == 0x80)
+		while (prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) != 0xC0)
 			--prefix_len;
 	return (prefix_len);
 }
@@ -1186,17 +1186,19 @@ x_e_getmbc(char *sbuf)
 	if (c == -1)
 		return (-1);
 	if (UTFMODE) {
-		if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) {
+		if ((rtt2asc(buf[0]) >= (unsigned char)0xC2) &&
+		    (rtt2asc(buf[0]) < (unsigned char)0xF0)) {
 			c = x_e_getc();
 			if (c == -1)
 				return (-1);
-			if ((c & 0xC0) != 0x80) {
+			if ((rtt2asc(c) & 0xC0) != 0x80) {
 				x_e_ungetc(c);
 				return (1);
 			}
 			buf[pos++] = c;
 		}
-		if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) {
+		if ((rtt2asc(buf[0]) >= (unsigned char)0xE0) &&
+		    (rtt2asc(buf[0]) < (unsigned char)0xF0)) {
 			/* XXX x_e_ungetc is one-octet only */
 			buf[pos++] = c = x_e_getc();
 			if (c == -1)
@@ -1317,7 +1319,7 @@ x_insert(int c)
 		return (KSTD);
 	}
 	if (UTFMODE) {
-		if (((c & 0xC0) == 0x80) && left) {
+		if (((rtt2asc(c) & 0xC0) == 0x80) && left) {
 			str[pos++] = c;
 			if (!--left) {
 				str[pos] = '\0';
@@ -1614,7 +1616,7 @@ x_bs0(char *cp, char *lower_bound)
 {
 	if (UTFMODE)
 		while ((!lower_bound || (cp > lower_bound)) &&
-		    ((*(unsigned char *)cp & 0xC0) == 0x80))
+		    ((rtt2asc(*cp) & 0xC0) == 0x80))
 			--cp;
 	return (cp);
 }
@@ -1635,7 +1637,7 @@ x_size2(char *cp, char **dcp)
 {
 	uint8_t c = *(unsigned char *)cp;
 
-	if (UTFMODE && (c > 0x7F))
+	if (UTFMODE && (rtt2asc(c) > 0x7F))
 		return (utf_widthadj(cp, (const char **)dcp));
 	if (dcp)
 		*dcp = cp + 1;
@@ -2903,6 +2905,7 @@ x_e_putc2(int c)
 	if (ctype(c, C_CR | C_LF))
 		x_col = 0;
 	if (x_col < xx_cols) {
+#ifndef MKSH_EBCDIC
 		if (UTFMODE && (c > 0x7F)) {
 			char utf_tmp[3];
 			size_t x;
@@ -2917,6 +2920,7 @@ x_e_putc2(int c)
 				x_putc(utf_tmp[2]);
 			width = utf_wcwidth(c);
 		} else
+#endif
 			x_putc(c);
 		switch (c) {
 		case KSH_BEL:
@@ -2950,7 +2954,13 @@ x_e_putc3(const char **cp)
 			width = utf_widthadj(*cp, (const char **)&cp2);
 			if (cp2 == *cp + 1) {
 				(*cp)++;
+#ifdef MKSH_EBCDIC
+				x_putc(asc2rtt(0xEF));
+				x_putc(asc2rtt(0xBF));
+				x_putc(asc2rtt(0xBD));
+#else
 				shf_puts("\xEF\xBF\xBD", shl_out);
+#endif
 			} else
 				while (*cp < cp2)
 					x_putcf(*(*cp)++);
diff --git a/eval.c b/eval.c
index bccacf4..9cb2f24 100644
--- a/eval.c
+++ b/eval.c
@@ -23,7 +23,7 @@
 
 #include "sh.h"
 
-__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.212 2017/05/03 15:36:12 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.213 2017/05/05 20:36:01 tg Exp $");
 
 /*
  * string expansion
@@ -1174,7 +1174,7 @@ varsub(Expand *xp, const char *sp, const char *word,
 					if (!UTFMODE || (len = utf_mbtowc(&wc,
 					    s)) == (size_t)-1)
 						/* not UTFMODE or not UTF-8 */
-						wc = (unsigned char)(*s++);
+						wc = rtt2asc(*s++);
 					else
 						/* UTFMODE and UTF-8 */
 						s += len;
@@ -1522,7 +1522,7 @@ trimsub(char *str, char *pat, int how)
 				goto trimsub_match;
 			if (UTFMODE) {
 				char *op = p;
-				while ((p-- > str) && ((*p & 0xC0) == 0x80))
+				while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80))
 					;
 				if ((p < str) || (p + utf_ptradj(p) != op))
 					p = op - 1;
diff --git a/expr.c b/expr.c
index 3dd4fa4..6a18f01 100644
--- a/expr.c
+++ b/expr.c
@@ -23,7 +23,7 @@
 
 #include "sh.h"
 
-__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.96 2017/04/27 23:12:46 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.97 2017/05/05 20:36:02 tg Exp $");
 
 #define EXPRTOK_DEFNS
 #include "exprtok.h"
@@ -772,8 +772,7 @@ utf_ptradj(const char *src)
 {
 	register size_t n;
 
-	if (!UTFMODE ||
-	    *(const unsigned char *)(src) < 0xC2 ||
+	if (!UTFMODE || rtt2asc(*src) < 0xC2 ||
 	    (n = utf_mbtowc(NULL, src)) == (size_t)-1)
 		n = 1;
 	return (n);
@@ -791,7 +790,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
 	const unsigned char *s = (const unsigned char *)src;
 	unsigned int c, wc;
 
-	if ((wc = *s++) < 0x80) {
+	if ((wc = ord(rtt2asc(*s++))) < 0x80) {
  out:
 		if (dst != NULL)
 			*dst = wc;
@@ -805,7 +804,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
 
 	if (wc < 0xE0) {
 		wc = (wc & 0x1F) << 6;
-		if (((c = *s++) & 0xC0) != 0x80)
+		if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
 			goto ilseq;
 		wc |= c & 0x3F;
 		goto out;
@@ -813,11 +812,11 @@ utf_mbtowc(unsigned int *dst, const char *src)
 
 	wc = (wc & 0x0F) << 12;
 
-	if (((c = *s++) & 0xC0) != 0x80)
+	if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
 		goto ilseq;
 	wc |= (c & 0x3F) << 6;
 
-	if (((c = *s++) & 0xC0) != 0x80)
+	if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
 		goto ilseq;
 	wc |= c & 0x3F;
 
@@ -834,18 +833,18 @@ utf_wctomb(char *dst, unsigned int wc)
 	unsigned char *d;
 
 	if (wc < 0x80) {
-		*dst = wc;
+		*dst = asc2rtt(wc);
 		return (1);
 	}
 
 	d = (unsigned char *)dst;
 	if (wc < 0x0800)
-		*d++ = (wc >> 6) | 0xC0;
+		*d++ = asc2rtt((wc >> 6) | 0xC0);
 	else {
-		*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
-		*d++ = ((wc >> 6) & 0x3F) | 0x80;
+		*d++ = asc2rtt(((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0);
+		*d++ = asc2rtt(((wc >> 6) & 0x3F) | 0x80);
 	}
-	*d++ = (wc & 0x3F) | 0x80;
+	*d++ = asc2rtt((wc & 0x3F) | 0x80);
 	return ((char *)d - dst);
 }
 
diff --git a/lex.c b/lex.c
index 987f547..d835abd 100644
--- a/lex.c
+++ b/lex.c
@@ -23,7 +23,7 @@
 
 #include "sh.h"
 
-__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.237 2017/04/28 00:38:31 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.238 2017/05/05 20:36:02 tg Exp $");
 
 /*
  * states while lexing word
@@ -1536,7 +1536,7 @@ pprompt(const char *cp, int ntruncate)
 				columns--;
 		} else if (*cp == delimiter)
 			indelimit = !indelimit;
-		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
+		else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
 			const char *cp2;
 			columns += utf_widthadj(cp, &cp2);
 			if (doprint && (indelimit ||
@@ -1754,19 +1754,19 @@ yyskiputf8bom(void)
 {
 	int c;
 
-	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
+	if (rtt2asc((c = o_getsc_u())) != 0xEF) {
 		ungetsc_i(c);
 		return;
 	}
-	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
+	if (rtt2asc((c = o_getsc_u())) != 0xBB) {
 		ungetsc_i(c);
-		ungetsc_i(0xEF);
+		ungetsc_i(asc2rtt(0xEF));
 		return;
 	}
-	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
+	if (rtt2asc((c = o_getsc_u())) != 0xBF) {
 		ungetsc_i(c);
-		ungetsc_i(0xBB);
-		ungetsc_i(0xEF);
+		ungetsc_i(asc2rtt(0xBB));
+		ungetsc_i(asc2rtt(0xEF));
 		return;
 	}
 	UTFMODE |= 8;
diff --git a/tree.c b/tree.c
index c1c4090..a732c72 100644
--- a/tree.c
+++ b/tree.c
@@ -23,7 +23,7 @@
 
 #include "sh.h"
 
-__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.91 2017/04/28 03:28:19 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.92 2017/05/05 20:36:03 tg Exp $");
 
 #define INDENT	8
 
@@ -805,7 +805,7 @@ vistree(char *dst, size_t sz, struct op *t)
 			goto vist_out;
 		*dst++ = '^';
 		c = ksh_unctrl(c);
-	} else if (UTFMODE && c > 0x7F) {
+	} else if (UTFMODE && rtt2asc(c) > 0x7F) {
 		/* better not try to display broken multibyte chars */
 		/* also go easy on the Unicode: no U+FFFD here */
 		c = '?';
diff --git a/var.c b/var.c
index 73dac13..7fd35f0 100644
--- a/var.c
+++ b/var.c
@@ -28,7 +28,7 @@
 #include <sys/sysctl.h>
 #endif
 
-__RCSID("$MirOS: src/bin/mksh/var.c,v 1.217 2017/04/29 22:04:31 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/var.c,v 1.218 2017/05/05 20:36:03 tg Exp $");
 
 /*-
  * Variables
@@ -414,9 +414,11 @@ str_val(struct tbl *vp)
 
 			*(s = strbuf) = '1';
 			s[1] = '#';
-			if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
+			if (!UTFMODE)
+				s[2] = (unsigned char)n;
+			else if ((n & 0xFF80) == 0xEF80)
 				/* OPTU-16 -> raw octet */
-				s[2] = n & 0xFF;
+				s[2] = asc2rtt(n & 0xFF);
 			else
 				sz = utf_wctomb(s + 2, n);
 			s[2 + sz] = '\0';
@@ -577,7 +579,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
 					 * the same as 1#\x80 does, thus is
 					 * not round-tripping correctly XXX)
 					 */
-					wc = 0xEF00 + *(const unsigned char *)s;
+					wc = 0xEF00 + rtt2asc(*s);
 				nump->u = (mksh_uari_t)wc;
 				return (1);
 			} else if (base > 36)