diff --git a/edit.c b/edit.c index ca52a3e..10d5da6 100644 --- a/edit.c +++ b/edit.c @@ -25,7 +25,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.182 2009/09/24 17:15:30 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.183 2009/09/26 04:01:31 tg Exp $"); /* tty driver characters we are interested in */ typedef struct { @@ -78,7 +78,6 @@ static int x_command_glob(int, const char *, int, char ***); static int x_locate_word(const char *, int, int, int *, bool *); static int x_e_getmbc(char *); -static int utf_wcwidth(unsigned int); /* +++ generic editing functions +++ */ @@ -732,232 +731,6 @@ x_escape(const char *s, size_t len, int (*putbuf_func)(const char *, size_t)) return (rval); } -/* UTF-8 hack: high-level functions */ - -int -utf_widthadj(const char *src, const char **dst) -{ - size_t len; - unsigned int wc; - int width; - - if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 || - wc == 0) - len = width = 1; - else if ((width = utf_wcwidth(wc)) < 0) - /* XXX use 2 for x_zotc3 here? */ - width = 1; - - if (dst) - *dst = src + len; - return (width); -} - -int -utf_mbswidth(const char *s) -{ - size_t len; - unsigned int wc; - int width = 0, cw; - - if (!UTFMODE) - return (strlen(s)); - - while (*s) - if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || - ((cw = utf_wcwidth(wc)) == -1)) { - s++; - width += 1; - } else { - s += len; - width += cw; - } - return (width); -} - -const char * -utf_skipcols(const char *p, int cols) -{ - int c = 0; - - while (c < cols) - c += utf_widthadj(p, &p); - return (p); -} - -size_t -utf_ptradj(const char *src) -{ - register size_t n; - - if (!UTFMODE || - *(const unsigned char *)(src) < 0xC2 || - (n = utf_mbtowc(NULL, src)) == (size_t)-1) - n = 1; - return (n); -} - -/* UTF-8 hack: low-level functions */ - -/* --- begin of wcwidth.c excerpt --- */ -/*- - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * - * Permission to use, copy, modify, and distribute this software - * for any purpose and without fee is hereby granted. The author - * disclaims all warranties with regard to this software. - */ - -__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $"); - -static int -utf_wcwidth(unsigned int c) -{ - static const struct cbset { - unsigned short first; - unsigned short last; - } comb[] = { - { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, - { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, - { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, - { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, - { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, - { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, - { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, - { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, - { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, - { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, - { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, - { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, - { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, - { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, - { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, - { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, - { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, - { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, - { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, - { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, - { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, - { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, - { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, - { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, - { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, - { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, - { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, - { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, - { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, - { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, - { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, - { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, - { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, - { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, - { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, - { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, - { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, - { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, - { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, - { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, - { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, - { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, - { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB } - }; - size_t min = 0, mid, max = NELEM(comb) - 1; - - /* test for 8-bit control characters */ - if (c < 32 || (c >= 0x7f && c < 0xa0)) - return (c ? -1 : 0); - - /* binary search in table of non-spacing characters */ - if (c >= comb[0].first && c <= comb[max].last) - while (max >= min) { - mid = (min + max) / 2; - if (c > comb[mid].last) - min = mid + 1; - else if (c < comb[mid].first) - max = mid - 1; - else - return (0); - } - - /* if we arrive here, c is not a combining or C0/C1 control char */ - return ((c >= 0x1100 && ( - c <= 0x115f || /* Hangul Jamo init. consonants */ - c == 0x2329 || c == 0x232a || - (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */ - (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ - (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ - (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ - (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ - (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ - (c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1); -} -/* --- end of wcwidth.c excerpt --- */ - -/* +++ CESU-8 multibyte and wide character conversion crafted for mksh +++ */ - -size_t -utf_mbtowc(unsigned int *dst, const char *src) -{ - const unsigned char *s = (const unsigned char *)src; - unsigned int c, wc; - - if ((wc = *s++) < 0x80) { - out: - if (dst != NULL) - *dst = wc; - return (wc ? ((const char *)s - src) : 0); - } - if (wc < 0xC2 || wc >= 0xF0) - /* < 0xC0: spurious second byte */ - /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */ - /* > 0xEF: beyond BMP */ - goto ilseq; - - if (wc < 0xE0) { - wc = (wc & 0x1F) << 6; - if (((c = *s++) & 0xC0) != 0x80) - goto ilseq; - wc |= c & 0x3F; - goto out; - } - - wc = (wc & 0x0F) << 12; - - if (((c = *s++) & 0xC0) != 0x80) - goto ilseq; - wc |= (c & 0x3F) << 6; - - if (((c = *s++) & 0xC0) != 0x80) - goto ilseq; - wc |= c & 0x3F; - - /* Check for non-minimalistic mapping error in 3-byte seqs */ - if (wc >= 0x0800 && wc <= 0xFFFD) - goto out; - ilseq: - return ((size_t)(-1)); -} - -size_t -utf_wctomb(char *dst, unsigned int wc) -{ - unsigned char *d; - - if (wc < 0x80) { - *dst = wc; - return (1); - } - - d = (unsigned char *)dst; - if (wc < 0x0800) - *d++ = (wc >> 6) | 0xC0; - else { - *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0; - *d++ = ((wc >> 6) & 0x3F) | 0x80; - } - *d++ = (wc & 0x3F) | 0x80; - return ((char *)d - dst); -} /* +++ emacs editing mode +++ */ diff --git a/expr.c b/expr.c index 17da216..f3fa64f 100644 --- a/expr.c +++ b/expr.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.35 2009/09/23 18:04:55 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.36 2009/09/26 04:01:32 tg Exp $"); /* The order of these enums is constrained by the order of opinfo[] */ enum token { @@ -656,3 +656,236 @@ intvar(Expr_state *es, struct tbl *vp) } return (vq); } + + +/* + * UTF-8 support code: high-level functions + */ + +int +utf_widthadj(const char *src, const char **dst) +{ + size_t len; + unsigned int wc; + int width; + + if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 || + wc == 0) + len = width = 1; + else if ((width = utf_wcwidth(wc)) < 0) + /* XXX use 2 for x_zotc3 here? */ + width = 1; + + if (dst) + *dst = src + len; + return (width); +} + +int +utf_mbswidth(const char *s) +{ + size_t len; + unsigned int wc; + int width = 0, cw; + + if (!UTFMODE) + return (strlen(s)); + + while (*s) + if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || + ((cw = utf_wcwidth(wc)) == -1)) { + s++; + width += 1; + } else { + s += len; + width += cw; + } + return (width); +} + +const char * +utf_skipcols(const char *p, int cols) +{ + int c = 0; + + while (c < cols) + c += utf_widthadj(p, &p); + return (p); +} + +size_t +utf_ptradj(const char *src) +{ + register size_t n; + + if (!UTFMODE || + *(const unsigned char *)(src) < 0xC2 || + (n = utf_mbtowc(NULL, src)) == (size_t)-1) + n = 1; + return (n); +} + +/* + * UTF-8 support code: low-level functions + */ + +/* CESU-8 multibyte and wide character conversion crafted for mksh */ + +size_t +utf_mbtowc(unsigned int *dst, const char *src) +{ + const unsigned char *s = (const unsigned char *)src; + unsigned int c, wc; + + if ((wc = *s++) < 0x80) { + out: + if (dst != NULL) + *dst = wc; + return (wc ? ((const char *)s - src) : 0); + } + if (wc < 0xC2 || wc >= 0xF0) + /* < 0xC0: spurious second byte */ + /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */ + /* > 0xEF: beyond BMP */ + goto ilseq; + + if (wc < 0xE0) { + wc = (wc & 0x1F) << 6; + if (((c = *s++) & 0xC0) != 0x80) + goto ilseq; + wc |= c & 0x3F; + goto out; + } + + wc = (wc & 0x0F) << 12; + + if (((c = *s++) & 0xC0) != 0x80) + goto ilseq; + wc |= (c & 0x3F) << 6; + + if (((c = *s++) & 0xC0) != 0x80) + goto ilseq; + wc |= c & 0x3F; + + /* Check for non-minimalistic mapping error in 3-byte seqs */ + if (wc >= 0x0800 && wc <= 0xFFFD) + goto out; + ilseq: + return ((size_t)(-1)); +} + +size_t +utf_wctomb(char *dst, unsigned int wc) +{ + unsigned char *d; + + if (wc < 0x80) { + *dst = wc; + return (1); + } + + d = (unsigned char *)dst; + if (wc < 0x0800) + *d++ = (wc >> 6) | 0xC0; + else { + *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0; + *d++ = ((wc >> 6) & 0x3F) | 0x80; + } + *d++ = (wc & 0x3F) | 0x80; + return ((char *)d - dst); +} + + +/* --- begin of wcwidth.c excerpt --- */ +/*- + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + */ + +__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $"); + +int +utf_wcwidth(unsigned int c) +{ + static const struct cbset { + unsigned short first; + unsigned short last; + } const comb[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, + { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, + { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, + { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, + { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, + { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, + { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, + { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, + { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, + { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, + { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, + { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, + { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, + { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, + { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, + { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, + { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, + { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB } + }; + size_t min = 0, mid, max = NELEM(comb) - 1; + + /* test for 8-bit control characters */ + if (c < 32 || (c >= 0x7f && c < 0xa0)) + return (c ? -1 : 0); + + /* binary search in table of non-spacing characters */ + if (c >= comb[0].first && c <= comb[max].last) + while (max >= min) { + mid = (min + max) / 2; + if (c > comb[mid].last) + min = mid + 1; + else if (c < comb[mid].first) + max = mid - 1; + else + return (0); + } + + /* if we arrive here, c is not a combining or C0/C1 control char */ + return ((c >= 0x1100 && ( + c <= 0x115f || /* Hangul Jamo init. consonants */ + c == 0x2329 || c == 0x232a || + (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */ + (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ + (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ + (c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */ + (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ + (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ + (c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1); +} +/* --- end of wcwidth.c excerpt --- */ diff --git a/sh.h b/sh.h index ec4a087..6e79e9d 100644 --- a/sh.h +++ b/sh.h @@ -134,7 +134,7 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.349 2009/09/26 03:40:01 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.350 2009/09/26 04:01:33 tg Exp $"); #endif #define MKSH_VERSION "R39 2009/09/25" @@ -1321,13 +1321,6 @@ void afree(void *, Area *); /* can take NULL */ /* edit.c */ void x_init(void); int x_read(char *, size_t); -/* UTF-8 stuff */ -size_t utf_mbtowc(unsigned int *, const char *); -size_t utf_wctomb(char *, unsigned int); -int utf_widthadj(const char *, const char **); -int utf_mbswidth(const char *); -const char *utf_skipcols(const char *, int); -size_t utf_ptradj(const char *); /* eval.c */ char *substitute(const char *, int); char **eval(const char **, int); @@ -1351,6 +1344,14 @@ int pr_list(char *const *); /* expr.c */ int evaluate(const char *, mksh_ari_t *, int, bool); int v_evaluate(struct tbl *, const char *, volatile int, bool); +/* UTF-8 stuff */ +size_t utf_mbtowc(unsigned int *, const char *); +size_t utf_wctomb(char *, unsigned int); +int utf_widthadj(const char *, const char **); +int utf_mbswidth(const char *); +const char *utf_skipcols(const char *, int); +size_t utf_ptradj(const char *); +int utf_wcwidth(unsigned int); /* funcs.c */ int c_hash(const char **); int c_cd(const char **); diff --git a/var.c b/var.c index cdd737d..71f7c0d 100644 --- a/var.c +++ b/var.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/var.c,v 1.92 2009/09/26 03:40:02 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/var.c,v 1.93 2009/09/26 04:01:34 tg Exp $"); /* * Variables @@ -357,6 +357,7 @@ str_val(struct tbl *vp) *(s = strbuf) = '1'; s[1] = '#'; if (!UTFMODE || ((n & 0xFF80) == 0xEF80)) + /* OPTU-16 -> raw octet */ s[2] = n & 0xFF; else sz = utf_wctomb(s + 2, n); @@ -511,6 +512,12 @@ getint(struct tbl *vp, mksh_ari_t *nump, bool arith) if (!UTFMODE) wc = *(unsigned char *)s; else if (utf_mbtowc(&wc, s) == (size_t)-1) + /* OPTU-8 -> OPTU-16 */ + /* + * (with a twist: 1#\uEF80 converts + * the same as 1#\x80 does, thus is + * not round-tripping correctly XXX) + */ wc = 0xEF00 + *(unsigned char *)s; *nump = (mksh_ari_t)wc; return (1);