* move the utf_* functions to a smaller file, to reduce the pain the

CPU has to endure while gcc is crunching on edit.c
* comment on mksh not using _exactly_ OPTU-8/OPTU-16 (XXX)
This commit is contained in:
tg 2009-09-26 04:01:34 +00:00
parent a59d14b565
commit 7b7b75b026
4 changed files with 252 additions and 238 deletions

229
edit.c
View File

@ -25,7 +25,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.182 2009/09/24 17:15:30 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.183 2009/09/26 04:01:31 tg Exp $");
/* tty driver characters we are interested in */
typedef struct {
@ -78,7 +78,6 @@ static int x_command_glob(int, const char *, int, char ***);
static int x_locate_word(const char *, int, int, int *, bool *);
static int x_e_getmbc(char *);
static int utf_wcwidth(unsigned int);
/* +++ generic editing functions +++ */
@ -732,232 +731,6 @@ x_escape(const char *s, size_t len, int (*putbuf_func)(const char *, size_t))
return (rval);
}
/* UTF-8 hack: high-level functions */
int
utf_widthadj(const char *src, const char **dst)
{
size_t len;
unsigned int wc;
int width;
if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
wc == 0)
len = width = 1;
else if ((width = utf_wcwidth(wc)) < 0)
/* XXX use 2 for x_zotc3 here? */
width = 1;
if (dst)
*dst = src + len;
return (width);
}
int
utf_mbswidth(const char *s)
{
size_t len;
unsigned int wc;
int width = 0, cw;
if (!UTFMODE)
return (strlen(s));
while (*s)
if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
((cw = utf_wcwidth(wc)) == -1)) {
s++;
width += 1;
} else {
s += len;
width += cw;
}
return (width);
}
const char *
utf_skipcols(const char *p, int cols)
{
int c = 0;
while (c < cols)
c += utf_widthadj(p, &p);
return (p);
}
size_t
utf_ptradj(const char *src)
{
register size_t n;
if (!UTFMODE ||
*(const unsigned char *)(src) < 0xC2 ||
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
n = 1;
return (n);
}
/* UTF-8 hack: low-level functions */
/* --- begin of wcwidth.c excerpt --- */
/*-
* Markus Kuhn -- 2007-05-26 (Unicode 5.0)
*
* Permission to use, copy, modify, and distribute this software
* for any purpose and without fee is hereby granted. The author
* disclaims all warranties with regard to this software.
*/
__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $");
static int
utf_wcwidth(unsigned int c)
{
static const struct cbset {
unsigned short first;
unsigned short last;
} comb[] = {
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }
};
size_t min = 0, mid, max = NELEM(comb) - 1;
/* test for 8-bit control characters */
if (c < 32 || (c >= 0x7f && c < 0xa0))
return (c ? -1 : 0);
/* binary search in table of non-spacing characters */
if (c >= comb[0].first && c <= comb[max].last)
while (max >= min) {
mid = (min + max) / 2;
if (c > comb[mid].last)
min = mid + 1;
else if (c < comb[mid].first)
max = mid - 1;
else
return (0);
}
/* if we arrive here, c is not a combining or C0/C1 control char */
return ((c >= 0x1100 && (
c <= 0x115f || /* Hangul Jamo init. consonants */
c == 0x2329 || c == 0x232a ||
(c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */
(c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
(c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
(c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */
(c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
(c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */
(c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1);
}
/* --- end of wcwidth.c excerpt --- */
/* +++ CESU-8 multibyte and wide character conversion crafted for mksh +++ */
size_t
utf_mbtowc(unsigned int *dst, const char *src)
{
const unsigned char *s = (const unsigned char *)src;
unsigned int c, wc;
if ((wc = *s++) < 0x80) {
out:
if (dst != NULL)
*dst = wc;
return (wc ? ((const char *)s - src) : 0);
}
if (wc < 0xC2 || wc >= 0xF0)
/* < 0xC0: spurious second byte */
/* < 0xC2: non-minimalistic mapping error in 2-byte seqs */
/* > 0xEF: beyond BMP */
goto ilseq;
if (wc < 0xE0) {
wc = (wc & 0x1F) << 6;
if (((c = *s++) & 0xC0) != 0x80)
goto ilseq;
wc |= c & 0x3F;
goto out;
}
wc = (wc & 0x0F) << 12;
if (((c = *s++) & 0xC0) != 0x80)
goto ilseq;
wc |= (c & 0x3F) << 6;
if (((c = *s++) & 0xC0) != 0x80)
goto ilseq;
wc |= c & 0x3F;
/* Check for non-minimalistic mapping error in 3-byte seqs */
if (wc >= 0x0800 && wc <= 0xFFFD)
goto out;
ilseq:
return ((size_t)(-1));
}
size_t
utf_wctomb(char *dst, unsigned int wc)
{
unsigned char *d;
if (wc < 0x80) {
*dst = wc;
return (1);
}
d = (unsigned char *)dst;
if (wc < 0x0800)
*d++ = (wc >> 6) | 0xC0;
else {
*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
*d++ = ((wc >> 6) & 0x3F) | 0x80;
}
*d++ = (wc & 0x3F) | 0x80;
return ((char *)d - dst);
}
/* +++ emacs editing mode +++ */

235
expr.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.35 2009/09/23 18:04:55 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.36 2009/09/26 04:01:32 tg Exp $");
/* The order of these enums is constrained by the order of opinfo[] */
enum token {
@ -656,3 +656,236 @@ intvar(Expr_state *es, struct tbl *vp)
}
return (vq);
}
/*
* UTF-8 support code: high-level functions
*/
int
utf_widthadj(const char *src, const char **dst)
{
size_t len;
unsigned int wc;
int width;
if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
wc == 0)
len = width = 1;
else if ((width = utf_wcwidth(wc)) < 0)
/* XXX use 2 for x_zotc3 here? */
width = 1;
if (dst)
*dst = src + len;
return (width);
}
int
utf_mbswidth(const char *s)
{
size_t len;
unsigned int wc;
int width = 0, cw;
if (!UTFMODE)
return (strlen(s));
while (*s)
if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
((cw = utf_wcwidth(wc)) == -1)) {
s++;
width += 1;
} else {
s += len;
width += cw;
}
return (width);
}
const char *
utf_skipcols(const char *p, int cols)
{
int c = 0;
while (c < cols)
c += utf_widthadj(p, &p);
return (p);
}
size_t
utf_ptradj(const char *src)
{
register size_t n;
if (!UTFMODE ||
*(const unsigned char *)(src) < 0xC2 ||
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
n = 1;
return (n);
}
/*
* UTF-8 support code: low-level functions
*/
/* CESU-8 multibyte and wide character conversion crafted for mksh */
size_t
utf_mbtowc(unsigned int *dst, const char *src)
{
const unsigned char *s = (const unsigned char *)src;
unsigned int c, wc;
if ((wc = *s++) < 0x80) {
out:
if (dst != NULL)
*dst = wc;
return (wc ? ((const char *)s - src) : 0);
}
if (wc < 0xC2 || wc >= 0xF0)
/* < 0xC0: spurious second byte */
/* < 0xC2: non-minimalistic mapping error in 2-byte seqs */
/* > 0xEF: beyond BMP */
goto ilseq;
if (wc < 0xE0) {
wc = (wc & 0x1F) << 6;
if (((c = *s++) & 0xC0) != 0x80)
goto ilseq;
wc |= c & 0x3F;
goto out;
}
wc = (wc & 0x0F) << 12;
if (((c = *s++) & 0xC0) != 0x80)
goto ilseq;
wc |= (c & 0x3F) << 6;
if (((c = *s++) & 0xC0) != 0x80)
goto ilseq;
wc |= c & 0x3F;
/* Check for non-minimalistic mapping error in 3-byte seqs */
if (wc >= 0x0800 && wc <= 0xFFFD)
goto out;
ilseq:
return ((size_t)(-1));
}
size_t
utf_wctomb(char *dst, unsigned int wc)
{
unsigned char *d;
if (wc < 0x80) {
*dst = wc;
return (1);
}
d = (unsigned char *)dst;
if (wc < 0x0800)
*d++ = (wc >> 6) | 0xC0;
else {
*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
*d++ = ((wc >> 6) & 0x3F) | 0x80;
}
*d++ = (wc & 0x3F) | 0x80;
return ((char *)d - dst);
}
/* --- begin of wcwidth.c excerpt --- */
/*-
* Markus Kuhn -- 2007-05-26 (Unicode 5.0)
*
* Permission to use, copy, modify, and distribute this software
* for any purpose and without fee is hereby granted. The author
* disclaims all warranties with regard to this software.
*/
__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $");
int
utf_wcwidth(unsigned int c)
{
static const struct cbset {
unsigned short first;
unsigned short last;
} const comb[] = {
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }
};
size_t min = 0, mid, max = NELEM(comb) - 1;
/* test for 8-bit control characters */
if (c < 32 || (c >= 0x7f && c < 0xa0))
return (c ? -1 : 0);
/* binary search in table of non-spacing characters */
if (c >= comb[0].first && c <= comb[max].last)
while (max >= min) {
mid = (min + max) / 2;
if (c > comb[mid].last)
min = mid + 1;
else if (c < comb[mid].first)
max = mid - 1;
else
return (0);
}
/* if we arrive here, c is not a combining or C0/C1 control char */
return ((c >= 0x1100 && (
c <= 0x115f || /* Hangul Jamo init. consonants */
c == 0x2329 || c == 0x232a ||
(c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */
(c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
(c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
(c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */
(c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
(c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */
(c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1);
}
/* --- end of wcwidth.c excerpt --- */

17
sh.h
View File

@ -134,7 +134,7 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.349 2009/09/26 03:40:01 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.350 2009/09/26 04:01:33 tg Exp $");
#endif
#define MKSH_VERSION "R39 2009/09/25"
@ -1321,13 +1321,6 @@ void afree(void *, Area *); /* can take NULL */
/* edit.c */
void x_init(void);
int x_read(char *, size_t);
/* UTF-8 stuff */
size_t utf_mbtowc(unsigned int *, const char *);
size_t utf_wctomb(char *, unsigned int);
int utf_widthadj(const char *, const char **);
int utf_mbswidth(const char *);
const char *utf_skipcols(const char *, int);
size_t utf_ptradj(const char *);
/* eval.c */
char *substitute(const char *, int);
char **eval(const char **, int);
@ -1351,6 +1344,14 @@ int pr_list(char *const *);
/* expr.c */
int evaluate(const char *, mksh_ari_t *, int, bool);
int v_evaluate(struct tbl *, const char *, volatile int, bool);
/* UTF-8 stuff */
size_t utf_mbtowc(unsigned int *, const char *);
size_t utf_wctomb(char *, unsigned int);
int utf_widthadj(const char *, const char **);
int utf_mbswidth(const char *);
const char *utf_skipcols(const char *, int);
size_t utf_ptradj(const char *);
int utf_wcwidth(unsigned int);
/* funcs.c */
int c_hash(const char **);
int c_cd(const char **);

9
var.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.92 2009/09/26 03:40:02 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.93 2009/09/26 04:01:34 tg Exp $");
/*
* Variables
@ -357,6 +357,7 @@ str_val(struct tbl *vp)
*(s = strbuf) = '1';
s[1] = '#';
if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
/* OPTU-16 -> raw octet */
s[2] = n & 0xFF;
else
sz = utf_wctomb(s + 2, n);
@ -511,6 +512,12 @@ getint(struct tbl *vp, mksh_ari_t *nump, bool arith)
if (!UTFMODE)
wc = *(unsigned char *)s;
else if (utf_mbtowc(&wc, s) == (size_t)-1)
/* OPTU-8 -> OPTU-16 */
/*
* (with a twist: 1#\uEF80 converts
* the same as 1#\x80 does, thus is
* not round-tripping correctly XXX)
*/
wc = 0xEF00 + *(unsigned char *)s;
*nump = (mksh_ari_t)wc;
return (1);