* move the utf_* functions to a smaller file, to reduce the pain the
CPU has to endure while gcc is crunching on edit.c * comment on mksh not using _exactly_ OPTU-8/OPTU-16 (XXX)
This commit is contained in:
parent
a59d14b565
commit
7b7b75b026
229
edit.c
229
edit.c
@ -25,7 +25,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.182 2009/09/24 17:15:30 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.183 2009/09/26 04:01:31 tg Exp $");
|
||||
|
||||
/* tty driver characters we are interested in */
|
||||
typedef struct {
|
||||
@ -78,7 +78,6 @@ static int x_command_glob(int, const char *, int, char ***);
|
||||
static int x_locate_word(const char *, int, int, int *, bool *);
|
||||
|
||||
static int x_e_getmbc(char *);
|
||||
static int utf_wcwidth(unsigned int);
|
||||
|
||||
/* +++ generic editing functions +++ */
|
||||
|
||||
@ -732,232 +731,6 @@ x_escape(const char *s, size_t len, int (*putbuf_func)(const char *, size_t))
|
||||
return (rval);
|
||||
}
|
||||
|
||||
/* UTF-8 hack: high-level functions */
|
||||
|
||||
int
|
||||
utf_widthadj(const char *src, const char **dst)
|
||||
{
|
||||
size_t len;
|
||||
unsigned int wc;
|
||||
int width;
|
||||
|
||||
if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
|
||||
wc == 0)
|
||||
len = width = 1;
|
||||
else if ((width = utf_wcwidth(wc)) < 0)
|
||||
/* XXX use 2 for x_zotc3 here? */
|
||||
width = 1;
|
||||
|
||||
if (dst)
|
||||
*dst = src + len;
|
||||
return (width);
|
||||
}
|
||||
|
||||
int
|
||||
utf_mbswidth(const char *s)
|
||||
{
|
||||
size_t len;
|
||||
unsigned int wc;
|
||||
int width = 0, cw;
|
||||
|
||||
if (!UTFMODE)
|
||||
return (strlen(s));
|
||||
|
||||
while (*s)
|
||||
if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
|
||||
((cw = utf_wcwidth(wc)) == -1)) {
|
||||
s++;
|
||||
width += 1;
|
||||
} else {
|
||||
s += len;
|
||||
width += cw;
|
||||
}
|
||||
return (width);
|
||||
}
|
||||
|
||||
const char *
|
||||
utf_skipcols(const char *p, int cols)
|
||||
{
|
||||
int c = 0;
|
||||
|
||||
while (c < cols)
|
||||
c += utf_widthadj(p, &p);
|
||||
return (p);
|
||||
}
|
||||
|
||||
size_t
|
||||
utf_ptradj(const char *src)
|
||||
{
|
||||
register size_t n;
|
||||
|
||||
if (!UTFMODE ||
|
||||
*(const unsigned char *)(src) < 0xC2 ||
|
||||
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
|
||||
n = 1;
|
||||
return (n);
|
||||
}
|
||||
|
||||
/* UTF-8 hack: low-level functions */
|
||||
|
||||
/* --- begin of wcwidth.c excerpt --- */
|
||||
/*-
|
||||
* Markus Kuhn -- 2007-05-26 (Unicode 5.0)
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software
|
||||
* for any purpose and without fee is hereby granted. The author
|
||||
* disclaims all warranties with regard to this software.
|
||||
*/
|
||||
|
||||
__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $");
|
||||
|
||||
static int
|
||||
utf_wcwidth(unsigned int c)
|
||||
{
|
||||
static const struct cbset {
|
||||
unsigned short first;
|
||||
unsigned short last;
|
||||
} comb[] = {
|
||||
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
|
||||
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
|
||||
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
|
||||
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
|
||||
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
|
||||
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
|
||||
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
|
||||
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
|
||||
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
|
||||
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
|
||||
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
|
||||
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
|
||||
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
|
||||
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
|
||||
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
|
||||
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
|
||||
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
|
||||
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
|
||||
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
|
||||
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
|
||||
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
|
||||
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
|
||||
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
|
||||
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
|
||||
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
|
||||
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
|
||||
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
|
||||
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
|
||||
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
|
||||
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
|
||||
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
|
||||
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
|
||||
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
|
||||
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
|
||||
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
|
||||
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
|
||||
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
|
||||
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
|
||||
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
|
||||
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
|
||||
{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
|
||||
{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
|
||||
{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }
|
||||
};
|
||||
size_t min = 0, mid, max = NELEM(comb) - 1;
|
||||
|
||||
/* test for 8-bit control characters */
|
||||
if (c < 32 || (c >= 0x7f && c < 0xa0))
|
||||
return (c ? -1 : 0);
|
||||
|
||||
/* binary search in table of non-spacing characters */
|
||||
if (c >= comb[0].first && c <= comb[max].last)
|
||||
while (max >= min) {
|
||||
mid = (min + max) / 2;
|
||||
if (c > comb[mid].last)
|
||||
min = mid + 1;
|
||||
else if (c < comb[mid].first)
|
||||
max = mid - 1;
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* if we arrive here, c is not a combining or C0/C1 control char */
|
||||
return ((c >= 0x1100 && (
|
||||
c <= 0x115f || /* Hangul Jamo init. consonants */
|
||||
c == 0x2329 || c == 0x232a ||
|
||||
(c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */
|
||||
(c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
|
||||
(c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
|
||||
(c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */
|
||||
(c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
|
||||
(c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */
|
||||
(c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1);
|
||||
}
|
||||
/* --- end of wcwidth.c excerpt --- */
|
||||
|
||||
/* +++ CESU-8 multibyte and wide character conversion crafted for mksh +++ */
|
||||
|
||||
size_t
|
||||
utf_mbtowc(unsigned int *dst, const char *src)
|
||||
{
|
||||
const unsigned char *s = (const unsigned char *)src;
|
||||
unsigned int c, wc;
|
||||
|
||||
if ((wc = *s++) < 0x80) {
|
||||
out:
|
||||
if (dst != NULL)
|
||||
*dst = wc;
|
||||
return (wc ? ((const char *)s - src) : 0);
|
||||
}
|
||||
if (wc < 0xC2 || wc >= 0xF0)
|
||||
/* < 0xC0: spurious second byte */
|
||||
/* < 0xC2: non-minimalistic mapping error in 2-byte seqs */
|
||||
/* > 0xEF: beyond BMP */
|
||||
goto ilseq;
|
||||
|
||||
if (wc < 0xE0) {
|
||||
wc = (wc & 0x1F) << 6;
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= c & 0x3F;
|
||||
goto out;
|
||||
}
|
||||
|
||||
wc = (wc & 0x0F) << 12;
|
||||
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= (c & 0x3F) << 6;
|
||||
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= c & 0x3F;
|
||||
|
||||
/* Check for non-minimalistic mapping error in 3-byte seqs */
|
||||
if (wc >= 0x0800 && wc <= 0xFFFD)
|
||||
goto out;
|
||||
ilseq:
|
||||
return ((size_t)(-1));
|
||||
}
|
||||
|
||||
size_t
|
||||
utf_wctomb(char *dst, unsigned int wc)
|
||||
{
|
||||
unsigned char *d;
|
||||
|
||||
if (wc < 0x80) {
|
||||
*dst = wc;
|
||||
return (1);
|
||||
}
|
||||
|
||||
d = (unsigned char *)dst;
|
||||
if (wc < 0x0800)
|
||||
*d++ = (wc >> 6) | 0xC0;
|
||||
else {
|
||||
*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
|
||||
*d++ = ((wc >> 6) & 0x3F) | 0x80;
|
||||
}
|
||||
*d++ = (wc & 0x3F) | 0x80;
|
||||
return ((char *)d - dst);
|
||||
}
|
||||
|
||||
/* +++ emacs editing mode +++ */
|
||||
|
||||
|
235
expr.c
235
expr.c
@ -22,7 +22,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.35 2009/09/23 18:04:55 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.36 2009/09/26 04:01:32 tg Exp $");
|
||||
|
||||
/* The order of these enums is constrained by the order of opinfo[] */
|
||||
enum token {
|
||||
@ -656,3 +656,236 @@ intvar(Expr_state *es, struct tbl *vp)
|
||||
}
|
||||
return (vq);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UTF-8 support code: high-level functions
|
||||
*/
|
||||
|
||||
int
|
||||
utf_widthadj(const char *src, const char **dst)
|
||||
{
|
||||
size_t len;
|
||||
unsigned int wc;
|
||||
int width;
|
||||
|
||||
if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
|
||||
wc == 0)
|
||||
len = width = 1;
|
||||
else if ((width = utf_wcwidth(wc)) < 0)
|
||||
/* XXX use 2 for x_zotc3 here? */
|
||||
width = 1;
|
||||
|
||||
if (dst)
|
||||
*dst = src + len;
|
||||
return (width);
|
||||
}
|
||||
|
||||
int
|
||||
utf_mbswidth(const char *s)
|
||||
{
|
||||
size_t len;
|
||||
unsigned int wc;
|
||||
int width = 0, cw;
|
||||
|
||||
if (!UTFMODE)
|
||||
return (strlen(s));
|
||||
|
||||
while (*s)
|
||||
if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
|
||||
((cw = utf_wcwidth(wc)) == -1)) {
|
||||
s++;
|
||||
width += 1;
|
||||
} else {
|
||||
s += len;
|
||||
width += cw;
|
||||
}
|
||||
return (width);
|
||||
}
|
||||
|
||||
const char *
|
||||
utf_skipcols(const char *p, int cols)
|
||||
{
|
||||
int c = 0;
|
||||
|
||||
while (c < cols)
|
||||
c += utf_widthadj(p, &p);
|
||||
return (p);
|
||||
}
|
||||
|
||||
size_t
|
||||
utf_ptradj(const char *src)
|
||||
{
|
||||
register size_t n;
|
||||
|
||||
if (!UTFMODE ||
|
||||
*(const unsigned char *)(src) < 0xC2 ||
|
||||
(n = utf_mbtowc(NULL, src)) == (size_t)-1)
|
||||
n = 1;
|
||||
return (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF-8 support code: low-level functions
|
||||
*/
|
||||
|
||||
/* CESU-8 multibyte and wide character conversion crafted for mksh */
|
||||
|
||||
size_t
|
||||
utf_mbtowc(unsigned int *dst, const char *src)
|
||||
{
|
||||
const unsigned char *s = (const unsigned char *)src;
|
||||
unsigned int c, wc;
|
||||
|
||||
if ((wc = *s++) < 0x80) {
|
||||
out:
|
||||
if (dst != NULL)
|
||||
*dst = wc;
|
||||
return (wc ? ((const char *)s - src) : 0);
|
||||
}
|
||||
if (wc < 0xC2 || wc >= 0xF0)
|
||||
/* < 0xC0: spurious second byte */
|
||||
/* < 0xC2: non-minimalistic mapping error in 2-byte seqs */
|
||||
/* > 0xEF: beyond BMP */
|
||||
goto ilseq;
|
||||
|
||||
if (wc < 0xE0) {
|
||||
wc = (wc & 0x1F) << 6;
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= c & 0x3F;
|
||||
goto out;
|
||||
}
|
||||
|
||||
wc = (wc & 0x0F) << 12;
|
||||
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= (c & 0x3F) << 6;
|
||||
|
||||
if (((c = *s++) & 0xC0) != 0x80)
|
||||
goto ilseq;
|
||||
wc |= c & 0x3F;
|
||||
|
||||
/* Check for non-minimalistic mapping error in 3-byte seqs */
|
||||
if (wc >= 0x0800 && wc <= 0xFFFD)
|
||||
goto out;
|
||||
ilseq:
|
||||
return ((size_t)(-1));
|
||||
}
|
||||
|
||||
size_t
|
||||
utf_wctomb(char *dst, unsigned int wc)
|
||||
{
|
||||
unsigned char *d;
|
||||
|
||||
if (wc < 0x80) {
|
||||
*dst = wc;
|
||||
return (1);
|
||||
}
|
||||
|
||||
d = (unsigned char *)dst;
|
||||
if (wc < 0x0800)
|
||||
*d++ = (wc >> 6) | 0xC0;
|
||||
else {
|
||||
*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
|
||||
*d++ = ((wc >> 6) & 0x3F) | 0x80;
|
||||
}
|
||||
*d++ = (wc & 0x3F) | 0x80;
|
||||
return ((char *)d - dst);
|
||||
}
|
||||
|
||||
|
||||
/* --- begin of wcwidth.c excerpt --- */
|
||||
/*-
|
||||
* Markus Kuhn -- 2007-05-26 (Unicode 5.0)
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software
|
||||
* for any purpose and without fee is hereby granted. The author
|
||||
* disclaims all warranties with regard to this software.
|
||||
*/
|
||||
|
||||
__RCSID("$miros: src/lib/libc/i18n/wcwidth.c,v 1.8 2008/09/20 12:01:18 tg Exp $");
|
||||
|
||||
int
|
||||
utf_wcwidth(unsigned int c)
|
||||
{
|
||||
static const struct cbset {
|
||||
unsigned short first;
|
||||
unsigned short last;
|
||||
} const comb[] = {
|
||||
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
|
||||
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
|
||||
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
|
||||
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
|
||||
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
|
||||
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
|
||||
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
|
||||
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
|
||||
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
|
||||
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
|
||||
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
|
||||
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
|
||||
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
|
||||
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
|
||||
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
|
||||
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
|
||||
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
|
||||
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
|
||||
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
|
||||
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
|
||||
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
|
||||
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
|
||||
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
|
||||
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
|
||||
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
|
||||
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
|
||||
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
|
||||
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
|
||||
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
|
||||
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
|
||||
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
|
||||
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
|
||||
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
|
||||
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
|
||||
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
|
||||
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
|
||||
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
|
||||
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
|
||||
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
|
||||
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
|
||||
{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
|
||||
{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
|
||||
{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }
|
||||
};
|
||||
size_t min = 0, mid, max = NELEM(comb) - 1;
|
||||
|
||||
/* test for 8-bit control characters */
|
||||
if (c < 32 || (c >= 0x7f && c < 0xa0))
|
||||
return (c ? -1 : 0);
|
||||
|
||||
/* binary search in table of non-spacing characters */
|
||||
if (c >= comb[0].first && c <= comb[max].last)
|
||||
while (max >= min) {
|
||||
mid = (min + max) / 2;
|
||||
if (c > comb[mid].last)
|
||||
min = mid + 1;
|
||||
else if (c < comb[mid].first)
|
||||
max = mid - 1;
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* if we arrive here, c is not a combining or C0/C1 control char */
|
||||
return ((c >= 0x1100 && (
|
||||
c <= 0x115f || /* Hangul Jamo init. consonants */
|
||||
c == 0x2329 || c == 0x232a ||
|
||||
(c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */
|
||||
(c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
|
||||
(c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
|
||||
(c >= 0xfe10 && c <= 0xfe19) || /* Vertical forms */
|
||||
(c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
|
||||
(c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */
|
||||
(c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1);
|
||||
}
|
||||
/* --- end of wcwidth.c excerpt --- */
|
||||
|
17
sh.h
17
sh.h
@ -134,7 +134,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef EXTERN
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.349 2009/09/26 03:40:01 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.350 2009/09/26 04:01:33 tg Exp $");
|
||||
#endif
|
||||
#define MKSH_VERSION "R39 2009/09/25"
|
||||
|
||||
@ -1321,13 +1321,6 @@ void afree(void *, Area *); /* can take NULL */
|
||||
/* edit.c */
|
||||
void x_init(void);
|
||||
int x_read(char *, size_t);
|
||||
/* UTF-8 stuff */
|
||||
size_t utf_mbtowc(unsigned int *, const char *);
|
||||
size_t utf_wctomb(char *, unsigned int);
|
||||
int utf_widthadj(const char *, const char **);
|
||||
int utf_mbswidth(const char *);
|
||||
const char *utf_skipcols(const char *, int);
|
||||
size_t utf_ptradj(const char *);
|
||||
/* eval.c */
|
||||
char *substitute(const char *, int);
|
||||
char **eval(const char **, int);
|
||||
@ -1351,6 +1344,14 @@ int pr_list(char *const *);
|
||||
/* expr.c */
|
||||
int evaluate(const char *, mksh_ari_t *, int, bool);
|
||||
int v_evaluate(struct tbl *, const char *, volatile int, bool);
|
||||
/* UTF-8 stuff */
|
||||
size_t utf_mbtowc(unsigned int *, const char *);
|
||||
size_t utf_wctomb(char *, unsigned int);
|
||||
int utf_widthadj(const char *, const char **);
|
||||
int utf_mbswidth(const char *);
|
||||
const char *utf_skipcols(const char *, int);
|
||||
size_t utf_ptradj(const char *);
|
||||
int utf_wcwidth(unsigned int);
|
||||
/* funcs.c */
|
||||
int c_hash(const char **);
|
||||
int c_cd(const char **);
|
||||
|
9
var.c
9
var.c
@ -22,7 +22,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.92 2009/09/26 03:40:02 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/var.c,v 1.93 2009/09/26 04:01:34 tg Exp $");
|
||||
|
||||
/*
|
||||
* Variables
|
||||
@ -357,6 +357,7 @@ str_val(struct tbl *vp)
|
||||
*(s = strbuf) = '1';
|
||||
s[1] = '#';
|
||||
if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
|
||||
/* OPTU-16 -> raw octet */
|
||||
s[2] = n & 0xFF;
|
||||
else
|
||||
sz = utf_wctomb(s + 2, n);
|
||||
@ -511,6 +512,12 @@ getint(struct tbl *vp, mksh_ari_t *nump, bool arith)
|
||||
if (!UTFMODE)
|
||||
wc = *(unsigned char *)s;
|
||||
else if (utf_mbtowc(&wc, s) == (size_t)-1)
|
||||
/* OPTU-8 -> OPTU-16 */
|
||||
/*
|
||||
* (with a twist: 1#\uEF80 converts
|
||||
* the same as 1#\x80 does, thus is
|
||||
* not round-tripping correctly XXX)
|
||||
*/
|
||||
wc = 0xEF00 + *(unsigned char *)s;
|
||||
*nump = (mksh_ari_t)wc;
|
||||
return (1);
|
||||
|
Loading…
x
Reference in New Issue
Block a user