From cc725e67ca8039e625b4390b95b7680d24c9edd1 Mon Sep 17 00:00:00 2001 From: tg Date: Fri, 5 May 2017 20:36:03 +0000 Subject: [PATCH] =?UTF-8?q?switch=20EBCDIC=20to=20=E2=80=9Cnega-UTF8?= =?UTF-8?q?=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- edit.c | 28 +++++++++++++++++++--------- eval.c | 6 +++--- expr.c | 23 +++++++++++------------ lex.c | 16 ++++++++-------- tree.c | 4 ++-- var.c | 10 ++++++---- 6 files changed, 49 insertions(+), 38 deletions(-) diff --git a/edit.c b/edit.c index b8e93f2..47bec8f 100644 --- a/edit.c +++ b/edit.c @@ -28,7 +28,7 @@ #ifndef MKSH_NO_CMDLINE_EDITING -__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.335 2017/04/29 22:04:26 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.336 2017/05/05 20:36:00 tg Exp $"); /* * in later versions we might use libtermcap for this, but since external @@ -714,8 +714,8 @@ x_longest_prefix(int nwords, char * const * words) break; } /* false for nwords==1 as 0 = words[0][prefix_len] then */ - if (UTFMODE && prefix_len && (words[0][prefix_len] & 0xC0) == 0x80) - while (prefix_len && (words[0][prefix_len] & 0xC0) != 0xC0) + if (UTFMODE && prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) == 0x80) + while (prefix_len && (rtt2asc(words[0][prefix_len]) & 0xC0) != 0xC0) --prefix_len; return (prefix_len); } @@ -1186,17 +1186,19 @@ x_e_getmbc(char *sbuf) if (c == -1) return (-1); if (UTFMODE) { - if ((buf[0] >= 0xC2) && (buf[0] < 0xF0)) { + if ((rtt2asc(buf[0]) >= (unsigned char)0xC2) && + (rtt2asc(buf[0]) < (unsigned char)0xF0)) { c = x_e_getc(); if (c == -1) return (-1); - if ((c & 0xC0) != 0x80) { + if ((rtt2asc(c) & 0xC0) != 0x80) { x_e_ungetc(c); return (1); } buf[pos++] = c; } - if ((buf[0] >= 0xE0) && (buf[0] < 0xF0)) { + if ((rtt2asc(buf[0]) >= (unsigned char)0xE0) && + (rtt2asc(buf[0]) < (unsigned char)0xF0)) { /* XXX x_e_ungetc is one-octet only */ buf[pos++] = c = x_e_getc(); if (c == -1) @@ -1317,7 +1319,7 @@ x_insert(int c) return (KSTD); } if (UTFMODE) { - if (((c & 0xC0) == 0x80) && left) { + if (((rtt2asc(c) & 0xC0) == 0x80) && left) { str[pos++] = c; if (!--left) { str[pos] = '\0'; @@ -1614,7 +1616,7 @@ x_bs0(char *cp, char *lower_bound) { if (UTFMODE) while ((!lower_bound || (cp > lower_bound)) && - ((*(unsigned char *)cp & 0xC0) == 0x80)) + ((rtt2asc(*cp) & 0xC0) == 0x80)) --cp; return (cp); } @@ -1635,7 +1637,7 @@ x_size2(char *cp, char **dcp) { uint8_t c = *(unsigned char *)cp; - if (UTFMODE && (c > 0x7F)) + if (UTFMODE && (rtt2asc(c) > 0x7F)) return (utf_widthadj(cp, (const char **)dcp)); if (dcp) *dcp = cp + 1; @@ -2903,6 +2905,7 @@ x_e_putc2(int c) if (ctype(c, C_CR | C_LF)) x_col = 0; if (x_col < xx_cols) { +#ifndef MKSH_EBCDIC if (UTFMODE && (c > 0x7F)) { char utf_tmp[3]; size_t x; @@ -2917,6 +2920,7 @@ x_e_putc2(int c) x_putc(utf_tmp[2]); width = utf_wcwidth(c); } else +#endif x_putc(c); switch (c) { case KSH_BEL: @@ -2950,7 +2954,13 @@ x_e_putc3(const char **cp) width = utf_widthadj(*cp, (const char **)&cp2); if (cp2 == *cp + 1) { (*cp)++; +#ifdef MKSH_EBCDIC + x_putc(asc2rtt(0xEF)); + x_putc(asc2rtt(0xBF)); + x_putc(asc2rtt(0xBD)); +#else shf_puts("\xEF\xBF\xBD", shl_out); +#endif } else while (*cp < cp2) x_putcf(*(*cp)++); diff --git a/eval.c b/eval.c index bccacf4..9cb2f24 100644 --- a/eval.c +++ b/eval.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.212 2017/05/03 15:36:12 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.213 2017/05/05 20:36:01 tg Exp $"); /* * string expansion @@ -1174,7 +1174,7 @@ varsub(Expand *xp, const char *sp, const char *word, if (!UTFMODE || (len = utf_mbtowc(&wc, s)) == (size_t)-1) /* not UTFMODE or not UTF-8 */ - wc = (unsigned char)(*s++); + wc = rtt2asc(*s++); else /* UTFMODE and UTF-8 */ s += len; @@ -1522,7 +1522,7 @@ trimsub(char *str, char *pat, int how) goto trimsub_match; if (UTFMODE) { char *op = p; - while ((p-- > str) && ((*p & 0xC0) == 0x80)) + while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80)) ; if ((p < str) || (p + utf_ptradj(p) != op)) p = op - 1; diff --git a/expr.c b/expr.c index 3dd4fa4..6a18f01 100644 --- a/expr.c +++ b/expr.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.96 2017/04/27 23:12:46 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.97 2017/05/05 20:36:02 tg Exp $"); #define EXPRTOK_DEFNS #include "exprtok.h" @@ -772,8 +772,7 @@ utf_ptradj(const char *src) { register size_t n; - if (!UTFMODE || - *(const unsigned char *)(src) < 0xC2 || + if (!UTFMODE || rtt2asc(*src) < 0xC2 || (n = utf_mbtowc(NULL, src)) == (size_t)-1) n = 1; return (n); @@ -791,7 +790,7 @@ utf_mbtowc(unsigned int *dst, const char *src) const unsigned char *s = (const unsigned char *)src; unsigned int c, wc; - if ((wc = *s++) < 0x80) { + if ((wc = ord(rtt2asc(*s++))) < 0x80) { out: if (dst != NULL) *dst = wc; @@ -805,7 +804,7 @@ utf_mbtowc(unsigned int *dst, const char *src) if (wc < 0xE0) { wc = (wc & 0x1F) << 6; - if (((c = *s++) & 0xC0) != 0x80) + if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80) goto ilseq; wc |= c & 0x3F; goto out; @@ -813,11 +812,11 @@ utf_mbtowc(unsigned int *dst, const char *src) wc = (wc & 0x0F) << 12; - if (((c = *s++) & 0xC0) != 0x80) + if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80) goto ilseq; wc |= (c & 0x3F) << 6; - if (((c = *s++) & 0xC0) != 0x80) + if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80) goto ilseq; wc |= c & 0x3F; @@ -834,18 +833,18 @@ utf_wctomb(char *dst, unsigned int wc) unsigned char *d; if (wc < 0x80) { - *dst = wc; + *dst = asc2rtt(wc); return (1); } d = (unsigned char *)dst; if (wc < 0x0800) - *d++ = (wc >> 6) | 0xC0; + *d++ = asc2rtt((wc >> 6) | 0xC0); else { - *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0; - *d++ = ((wc >> 6) & 0x3F) | 0x80; + *d++ = asc2rtt(((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0); + *d++ = asc2rtt(((wc >> 6) & 0x3F) | 0x80); } - *d++ = (wc & 0x3F) | 0x80; + *d++ = asc2rtt((wc & 0x3F) | 0x80); return ((char *)d - dst); } diff --git a/lex.c b/lex.c index 987f547..d835abd 100644 --- a/lex.c +++ b/lex.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.237 2017/04/28 00:38:31 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.238 2017/05/05 20:36:02 tg Exp $"); /* * states while lexing word @@ -1536,7 +1536,7 @@ pprompt(const char *cp, int ntruncate) columns--; } else if (*cp == delimiter) indelimit = !indelimit; - else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { + else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) { const char *cp2; columns += utf_widthadj(cp, &cp2); if (doprint && (indelimit || @@ -1754,19 +1754,19 @@ yyskiputf8bom(void) { int c; - if ((unsigned char)(c = o_getsc_u()) != 0xEF) { + if (rtt2asc((c = o_getsc_u())) != 0xEF) { ungetsc_i(c); return; } - if ((unsigned char)(c = o_getsc_u()) != 0xBB) { + if (rtt2asc((c = o_getsc_u())) != 0xBB) { ungetsc_i(c); - ungetsc_i(0xEF); + ungetsc_i(asc2rtt(0xEF)); return; } - if ((unsigned char)(c = o_getsc_u()) != 0xBF) { + if (rtt2asc((c = o_getsc_u())) != 0xBF) { ungetsc_i(c); - ungetsc_i(0xBB); - ungetsc_i(0xEF); + ungetsc_i(asc2rtt(0xBB)); + ungetsc_i(asc2rtt(0xEF)); return; } UTFMODE |= 8; diff --git a/tree.c b/tree.c index c1c4090..a732c72 100644 --- a/tree.c +++ b/tree.c @@ -23,7 +23,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.91 2017/04/28 03:28:19 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.92 2017/05/05 20:36:03 tg Exp $"); #define INDENT 8 @@ -805,7 +805,7 @@ vistree(char *dst, size_t sz, struct op *t) goto vist_out; *dst++ = '^'; c = ksh_unctrl(c); - } else if (UTFMODE && c > 0x7F) { + } else if (UTFMODE && rtt2asc(c) > 0x7F) { /* better not try to display broken multibyte chars */ /* also go easy on the Unicode: no U+FFFD here */ c = '?'; diff --git a/var.c b/var.c index 73dac13..7fd35f0 100644 --- a/var.c +++ b/var.c @@ -28,7 +28,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/var.c,v 1.217 2017/04/29 22:04:31 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/var.c,v 1.218 2017/05/05 20:36:03 tg Exp $"); /*- * Variables @@ -414,9 +414,11 @@ str_val(struct tbl *vp) *(s = strbuf) = '1'; s[1] = '#'; - if (!UTFMODE || ((n & 0xFF80) == 0xEF80)) + if (!UTFMODE) + s[2] = (unsigned char)n; + else if ((n & 0xFF80) == 0xEF80) /* OPTU-16 -> raw octet */ - s[2] = n & 0xFF; + s[2] = asc2rtt(n & 0xFF); else sz = utf_wctomb(s + 2, n); s[2 + sz] = '\0'; @@ -577,7 +579,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith) * the same as 1#\x80 does, thus is * not round-tripping correctly XXX) */ - wc = 0xEF00 + *(const unsigned char *)s; + wc = 0xEF00 + rtt2asc(*s); nump->u = (mksh_uari_t)wc; return (1); } else if (base > 36)