diff --git a/edit.c b/edit.c index 747038e..dd36b5e 100644 --- a/edit.c +++ b/edit.c @@ -5,7 +5,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.38 2006/08/24 20:32:52 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.39 2006/11/05 15:31:36 tg Exp $"); /* tty driver characters we are interested in */ typedef struct { @@ -56,6 +56,25 @@ static int x_file_glob(int, const char *, int, char ***); static int x_command_glob(int, const char *, int, char ***); static int x_locate_word(const char *, int, int, int *, int *); +#if 0 +static void D(int); +static void +D(int c) +{ + static FILE *_Dfp = NULL; + + if (_Dfp == NULL) { + if ((_Dfp = fopen("/tmp/mksh.dbg", "ab+")) == NULL) + abort(); + fprintf(_Dfp, "\n\nOpening from %ld\n", (long)getpid()); + } + putc(c, _Dfp); + fflush(_Dfp); +} +#else +#define D(x) /* nothing */ +#endif + /* +++ generic editing functions +++ */ /* Called from main */ @@ -835,6 +854,236 @@ x_escape(const char *s, size_t len, int (*putbuf_func) (const char *, size_t)) return (rval); } +/* +++ UTF-8 hack +++ */ + +static size_t mbxtowc(unsigned *, const char *); +static size_t wcxtomb(char *, unsigned); +static int wcxwidth(unsigned); + +/* UTF-8 hack: high-level functions */ + +#define utf_backch(c) \ + (!Flag(FUTFHACK) ? (c) - 1 : __extension__({ \ + /*const*/ unsigned char *utf_backch_cp = (c); \ + --utf_backch_cp; \ + while ((*utf_backch_cp >= 0x80) && \ + (*utf_backch_cp < 0xC0)) \ + --utf_backch_cp; \ + (__typeof__ (c))utf_backch_cp; \ + })) + +int +utf_widthadj(const char *src, const char **dst) +{ + size_t len = (size_t)-1; + unsigned wc; + + if (Flag(FUTFHACK) && *(const unsigned char *)src > 0x7F) + len = mbxtowc(&wc, src); + + if (len == (size_t)-1) { + if (dst) + *dst = src + 1; + return (1); + } + + if (dst) + *dst = src + len; + return (wcxwidth(wc)); +} + +/* UTF-8 hack: low-level functions */ + +/* --- begin of wcwidth.c excerpt --- */ +/* + * Markus Kuhn -- 2003-05-20 (Unicode 4.0) + * + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + */ + +__RCSID("_MirOS: src/lib/libc/i18n/wcwidth.c,v 1.4 2006/11/01 20:01:20 tg Exp $"); + +struct wcxwidth_interval { + unsigned first; + unsigned last; +}; + +/* auxiliary function for binary search in interval table */ +static inline int wcxwidth_bisearch(unsigned, const struct wcxwidth_interval *, size_t); + +/* sorted list of non-overlapping intervals of non-spacing characters */ +/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ +static const struct wcxwidth_interval wcxwidth_combining[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, + { 0x0591, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, + { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, + { 0x0600, 0x0603 }, { 0x0610, 0x0615 }, { 0x064B, 0x065E }, + { 0x0670, 0x0670 }, { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, + { 0x06EA, 0x06ED }, { 0x070F, 0x070F }, { 0x0711, 0x0711 }, + { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, + { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, + { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, + { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, + { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, + { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, + { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, + { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, + { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, + { 0x1160, 0x11FF }, { 0x135F, 0x135F }, { 0x1712, 0x1714 }, + { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, + { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, + { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, { 0x180B, 0x180D }, + { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, + { 0x1932, 0x1932 }, { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, + { 0x1DC0, 0x1DC3 }, { 0x200B, 0x200F }, { 0x202A, 0x202E }, + { 0x2060, 0x2063 }, { 0x206A, 0x206F }, { 0x20D0, 0x20EB }, + { 0x302A, 0x302F }, { 0x3099, 0x309A }, { 0xA806, 0xA806 }, + { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, + { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, + { 0xFFF9, 0xFFFB } +}; + +static int +wcxwidth_bisearch(unsigned ucs, const struct wcxwidth_interval *table, size_t max) +{ + size_t min = 0, mid; + + if (ucs >= table[0].first && ucs <= table[max].last) { + while (max >= min) { + mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return (1); + } + } + + return (0); +} + +int +wcxwidth(unsigned c) +{ + /* test for 8-bit control characters */ + if (c < 32 || (c >= 0x7f && c < 0xa0)) + return (c ? -1 : 0); + + /* binary search in table of non-spacing characters */ + if (wcxwidth_bisearch(c, wcxwidth_combining, + sizeof (wcxwidth_combining) / sizeof (struct wcxwidth_interval) - 1)) + return (0); + + /* if we arrive here, c is not a combining or C0/C1 control char */ + + return ((c >= 0x1100 && ( + c <= 0x115f || /* Hangul Jamo init. consonants */ + c == 0x2329 || c == 0x232a || + (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f) || /* CJK ... Yi */ + (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */ + (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */ + (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */ + (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */ + (c >= 0xffe0 && c <= 0xffe6))) ? 2 : 1); +} +/* --- end of wcwidth.c excerpt --- */ + +/* --- begin of mbrtowc.c excerpt --- */ +__RCSID("_MirOS: src/lib/libc/i18n/mbrtowc.c,v 1.13 2006/11/01 20:01:19 tg Exp $"); + +size_t +mbxtowc(unsigned *dst, const char *src) +{ + const unsigned char *s = (const unsigned char *)src; + unsigned c, wc; + unsigned count; + + wc = *s++; + if (wc < 0x80) { + count = 0; + } else if (wc < 0xC2) { + /* < 0xC0: spurious second byte */ + /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */ + goto ilseq; + } else if (wc < 0xE0) { + count = 1; /* one byte follows */ + wc = (wc & 0x1F) << 6; + } else if (wc < 0xF0) { + count = 2; /* two bytes follow */ + wc = (wc & 0x0F) << 12; + } else { + /* we don't support more than UCS-2 */ + goto ilseq; + } + + while (count) { + if (((c = *s++) & 0xC0) != 0x80) + goto ilseq; + c &= 0x3F; + wc |= c << (6 * --count); + + /* Check for non-minimalistic mapping error in 3-byte seqs */ + if (count && (wc < 0x0800)) + goto ilseq; + } + + if (wc > 0xFFFD) { + ilseq: + return ((size_t)(-1)); + } + + if (dst != NULL) + *dst = wc; + return (wc ? ((const char *)s - src) : 0); +} +/* --- end of mbrtowc.c excerpt --- */ + +/* --- begin of wcrtomb.c excerpt --- */ +__RCSID("_MirOS: src/lib/libc/i18n/wcrtomb.c,v 1.14 2006/11/01 20:12:44 tg Exp $"); + +static size_t +wcxtomb(char *src, unsigned wc) +{ + unsigned char *s = (unsigned char *)src; + unsigned count; + + if (wc > 0xFFFD) + wc = 0xFFFD; + if (wc < 0x80) { + count = 0; + *s++ = wc; + } else if (wc < 0x0800) { + count = 1; + *s++ = (wc >> 6) | 0xC0; + } else { + count = 2; + *s++ = (wc >> 12) | 0xE0; + } + + while (__predict_false(count)) { + *s++ = ((wc >> (6 * --count)) & 0x3F) | 0x80; + } + return ((char *)s - src); +} +/* --- end of wcrtomb.c excerpt --- */ + /* +++ emacs editing mode +++ */ static Area aedit; @@ -942,11 +1191,12 @@ static void x_delete(int, int); static int x_bword(void); static int x_fword(void); static void x_goto(char *); -static void x_bs(int); +static void x_bs2(char *); static int x_size_str(char *); -static int x_size(int); -static void x_zots(u_char *); -static void x_zotc(int); +static int x_size2(char *, char **); +static void x_zots(char *); +static void x_zotc2(int); +static void x_zotc3(char **); static void x_load_hist(char **); static int x_search(char *, int, int); static int x_match(char *, char *); @@ -958,7 +1208,8 @@ static void x_print(int, int); static void x_adjust(void); static void x_e_ungetc(int); static int x_e_getc(void); -static void x_e_putc(int); +static void x_e_putc2(int); +static void x_e_putc3(const char **); static void x_e_puts(const char *); static int x_fold_case(int); static char *x_lastcp(void); @@ -1256,7 +1507,7 @@ x_emacs(char *buf, size_t len) if (x_displen < 1) { x_col = 0; x_displen = xx_cols - 2; - x_e_putc('\n'); + x_e_putc2('\n'); prompt_redraw = 0; } @@ -1315,7 +1566,7 @@ x_insert(int c) * Should allow tab and control chars. */ if (c == 0) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } str[0] = c; @@ -1329,7 +1580,7 @@ static int x_ins_string(int c) { if (macroptr) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } macroptr = x_atab[c >> 8][c & CHARMASK]; @@ -1344,7 +1595,7 @@ static int x_do_ins(const char *cp, int len) { if (xep + len >= xend) { - x_e_putc(7); + x_e_putc2(7); return -1; } memmove(xcp + len, xcp, xep - xcp + 1); @@ -1357,7 +1608,7 @@ x_do_ins(const char *cp, int len) static int x_ins(char *s) { - u_char *cp = (u_char *)xcp; + char *cp = xcp; int adj = x_adj_done; if (x_do_ins(s, strlen(s)) < 0) @@ -1372,8 +1623,8 @@ x_ins(char *s) x_zots(cp); if (adj == x_adj_done) { /* has x_adjust() been called? */ /* no */ - for (cp = (u_char *)xlp; cp > (u_char *)xcp; ) - x_bs(*--cp); + for (cp = xlp; cp > xcp; ) + x_bs2(cp = utf_backch(cp)); } x_adj_ok = 1; return 0; @@ -1398,7 +1649,7 @@ x_del_back(int c __attribute__((unused))) int col = xcp - xbuf; if (col == 0) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } if (x_arg > col) @@ -1414,7 +1665,7 @@ x_del_char(int c __attribute__((unused))) int nleft = xep - xcp; if (!nleft) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } if (x_arg > nleft) @@ -1428,7 +1679,7 @@ static void x_delete(int nc, int push) { int i, j; - u_char *cp; + char *cp; if (nc == 0) return; @@ -1445,15 +1696,15 @@ x_delete(int nc, int push) x_push(nc); xep -= nc; - cp = (u_char *)xcp; + cp = xcp; j = 0; i = nc; while (i--) { - j += x_size(*(u_char *)cp++); + j += x_size2(cp, &cp); } memmove(xcp, xcp + nc, xep - xcp + 1); /* Copies the null */ x_adj_ok = 0; /* don't redraw */ - x_zots((u_char *)xcp); + x_zots(xcp); /* * if we are already filling the line, * there is no need to ' ','\b'. @@ -1463,16 +1714,16 @@ x_delete(int nc, int push) j = (j < i) ? j : i; i = j; while (i--) - x_e_putc(' '); + x_e_putc2(' '); i = j; while (i--) - x_e_putc('\b'); + x_e_putc2('\b'); } /*x_goto(xcp);*/ x_adj_ok = 1; xlp_valid = false; - for (cp = (u_char *)x_lastcp(); cp > (u_char *)xcp; ) - x_bs(*--cp); + for (cp = x_lastcp(); cp > xcp; ) + x_bs2(cp = utf_backch(cp)); return; } @@ -1512,7 +1763,7 @@ x_bword(void) char *cp = xcp; if (cp == xbuf) { - x_e_putc(7); + x_e_putc2(7); return 0; } while (x_arg--) { @@ -1536,7 +1787,7 @@ x_fword(void) char *cp = xcp; if (cp == xep) { - x_e_putc(7); + x_e_putc2(7); return 0; } while (x_arg--) { @@ -1555,45 +1806,51 @@ x_fword(void) static void x_goto(char *cp) { + D('A'); if (cp < xbp || cp >= (xbp + x_displen)) { /* we are heading off screen */ xcp = cp; x_adjust(); } else if (cp < xcp) { /* move back */ - u_char *uxcp = (u_char *)xcp; - while ((u_char *)cp < uxcp) - x_bs(*--uxcp); - xcp = (char *)uxcp; + while (cp < xcp) + x_bs2(xcp = utf_backch(xcp)); } else if (cp > xcp) { /* move forward */ - u_char *uxcp = (u_char *)xcp; - while ((u_char *)cp > uxcp) - x_zotc(*uxcp++); - xcp = (char *)uxcp; + while (cp > xcp) + x_zotc3(&xcp); } + D('B'); } static void -x_bs(int c) +x_bs2(char *cp) { int i; - i = x_size(c); + i = x_size2(cp, NULL); while (i--) - x_e_putc('\b'); + x_e_putc2('\b'); } static int x_size_str(char *cp) { int size = 0; + D('C'); while (*cp) - size += x_size(*(u_char *)cp++); + size += x_size2(cp, &cp); + D('D'); return size; } static int -x_size(int c) +x_size2(char *cp, char **dcp) { + int c = *(unsigned char *)cp; + + if (Flag(FUTFHACK) && (c > 0x7F)) + return (utf_widthadj(cp, (const char **)dcp)); + if (dcp) + *dcp = cp + 1; if (c == '\t') return 4; /* Kludge, tabs are always four spaces. */ if (c < ' ' || c == 0x7f) @@ -1602,26 +1859,46 @@ x_size(int c) } static void -x_zots(u_char *str) +x_zots(char *str) { int adj = x_adj_done; + D('E'); x_lastcp(); - while (*str && str < (u_char *)xlp && adj == x_adj_done) - x_zotc(*str++); + D('F'); + while (*str && str < xlp && adj == x_adj_done) + x_zotc3(&str); + D('G'); } static void -x_zotc(int c) +x_zotc2(int c) { if (c == '\t') { /* Kludge, tabs are always four spaces. */ x_e_puts(" "); } else if (c < ' ' || c == 0x7f) { - x_e_putc('^'); - x_e_putc(UNCTRL(c)); + x_e_putc2('^'); + x_e_putc2(UNCTRL(c)); } else - x_e_putc(c); + x_e_putc2(c); +} + +static void +x_zotc3(char **cp) +{ + int c = **(unsigned char **)cp; + + if (c == '\t') { + /* Kludge, tabs are always four spaces. */ + x_e_puts(" "); + (*cp)++; + } else if (c < ' ' || c == 0x7f) { + x_e_putc2('^'); + x_e_putc2(UNCTRL(c)); + (*cp)++; + } else + x_e_putc3((const char **)cp); } static int @@ -1630,7 +1907,7 @@ x_mv_back(int c __attribute__((unused))) int col = xcp - xbuf; if (col == 0) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } if (x_arg > col) @@ -1645,7 +1922,7 @@ x_mv_forw(int c __attribute__((unused))) int nleft = xep - xcp; if (!nleft) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } if (x_arg > nleft) @@ -1665,7 +1942,7 @@ x_search_char_forw(int c) if (c < 0 || ((cp = (cp == xep) ? NULL : strchr(cp + 1, c)) == NULL && (cp = strchr(xbuf, c)) == NULL)) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } } @@ -1684,7 +1961,7 @@ x_search_char_back(int c) if (p-- == xbuf) p = xep; if (c < 0 || p == cp) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } if (*p == c) @@ -1697,8 +1974,8 @@ x_search_char_back(int c) static int x_newline(int c __attribute__((unused))) { - x_e_putc('\r'); - x_e_putc('\n'); + x_e_putc2('\r'); + x_e_putc2('\n'); x_flush(); *xep++ = '\n'; return KEOL; @@ -1707,7 +1984,7 @@ x_newline(int c __attribute__((unused))) static int x_end_of_text(int c __attribute__((unused))) { - x_zotc(edchars.eof); + x_zotc2(edchars.eof); x_putc('\r'); x_putc('\n'); x_flush(); @@ -1762,7 +2039,7 @@ x_load_hist(char **hp) int oldsize; if (hp < history || hp > histptr) { - x_e_putc(7); + x_e_putc2(7); return; } x_histp = hp; @@ -1836,7 +2113,7 @@ x_search_hist(int c) /* add char to pattern */ /* overflow check... */ if (p >= &pat[sizeof(pat) - 1]) { - x_e_putc(7); + x_e_putc2(7); continue; } *p++ = c, *p = '\0'; @@ -1871,13 +2148,13 @@ x_search(char *pat, int sameline, int offset) i = x_match(*hp, pat); if (i >= 0) { if (offset < 0) - x_e_putc('\n'); + x_e_putc2('\n'); x_load_hist(hp); x_goto(xbuf + i + strlen(pat) - (*pat == '^')); return i; } } - x_e_putc(7); + x_e_putc2(7); x_histp = histptr; return -1; } @@ -1942,13 +2219,13 @@ static void x_redraw(int limit) { int i, j, x_trunc = 0; - u_char *cp; + char *cp; x_adj_ok = 0; if (limit == -1) - x_e_putc('\n'); + x_e_putc2('\n'); else - x_e_putc('\r'); + x_e_putc2('\r'); x_flush(); if (xbp == xbuf) { x_col = promptlen(prompt); @@ -1965,8 +2242,8 @@ x_redraw(int limit) x_displen = xx_cols - 2; } xlp_valid = false; - cp = (u_char *)x_lastcp(); - x_zots((u_char *)xbp); + cp = x_lastcp(); + x_zots(xbp); if (xbp != xbuf || xep > xlp) limit = xx_cols; if (limit >= 0) { @@ -1976,7 +2253,7 @@ x_redraw(int limit) i = limit - (xlp - xbp); for (j = 0; j < i && x_col < (xx_cols - 2); j++) - x_e_putc(' '); + x_e_putc2(' '); i = ' '; if (xep > xlp) { /* more off screen */ if (xbp > xbuf) @@ -1985,13 +2262,13 @@ x_redraw(int limit) i = '>'; } else if (xbp > xbuf) i = '<'; - x_e_putc(i); + x_e_putc2(i); j++; while (j--) - x_e_putc('\b'); + x_e_putc2('\b'); } - for (cp = (u_char *)xlp; cp > (u_char *)xcp; ) - x_bs(*--cp); + for (cp = xlp; cp > xcp; ) + x_bs2(cp = utf_backch(cp)); x_adj_ok = 1; return; } @@ -1999,7 +2276,7 @@ x_redraw(int limit) static int x_transpose(int c __attribute__((unused))) { - char tmp; + unsigned tmpa, tmpb; /* What transpose is meant to do seems to be up for debate. This * is a general summary of the options; the text is abcd with the @@ -2015,35 +2292,47 @@ x_transpose(int c __attribute__((unused))) * to the one they want. */ if (xcp == xbuf) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } else if (xcp == xep || Flag(FGMACS)) { if (xcp - xbuf == 1) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } /* Gosling/Unipress emacs style: Swap two characters before the * cursor, do not change cursor position */ - x_bs(((u_char *)xcp)[-1]); - x_bs(((u_char *)xcp)[-2]); - x_zotc(((u_char *)xcp)[-1]); - x_zotc(((u_char *)xcp)[-2]); - tmp = xcp[-1]; - xcp[-1] = xcp[-2]; - xcp[-2] = tmp; + x_bs2(xcp = utf_backch(xcp)); + if (mbxtowc(&tmpa, xcp) == (size_t)-1) { + x_e_putc2(7); + return KSTD; + } + x_bs2(xcp = utf_backch(xcp)); + if (mbxtowc(&tmpb, xcp) == (size_t)-1) { + x_e_putc2(7); + return KSTD; + } + wcxtomb(xcp, tmpa); + x_zotc3(&xcp); + wcxtomb(xcp, tmpb); + x_zotc3(&xcp); } else { /* GNU emacs style: Swap the characters before and under the * cursor, move cursor position along one. */ - x_bs(((u_char *)xcp)[-1]); - x_zotc(((u_char *)xcp)[0]); - x_zotc(((u_char *)xcp)[-1]); - tmp = xcp[-1]; - xcp[-1] = xcp[0]; - xcp[0] = tmp; - x_bs(((u_char *)xcp)[0]); - x_goto(xcp + 1); + if (mbxtowc(&tmpa, xcp) == (size_t)-1) { + x_e_putc2(7); + return KSTD; + } + x_bs2(xcp = utf_backch(xcp)); + if (mbxtowc(&tmpb, xcp) == (size_t)-1) { + x_e_putc2(7); + return KSTD; + } + wcxtomb(xcp, tmpa); + x_zotc3(&xcp); + wcxtomb(xcp, tmpb); + x_zotc3(&xcp); } return KSTD; } @@ -2154,7 +2443,7 @@ x_abort(int c __attribute__((unused))) static int x_error(int c __attribute__((unused))) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } @@ -2374,7 +2663,7 @@ x_kill_region(int c __attribute__((unused))) char *xr; if (xmp == NULL) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } if (xmp > xcp) { @@ -2396,7 +2685,7 @@ x_xchg_point_mark(int c __attribute__((unused))) char *tmp; if (xmp == NULL) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } tmp = xmp; @@ -2476,7 +2765,7 @@ x_expand(int c __attribute__((unused))) &start, &end, &words, &is_command); if (nwords == 0) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } x_goto(xbuf + start); @@ -2484,7 +2773,7 @@ x_expand(int c __attribute__((unused))) for (i = 0; i < nwords;) { if (x_escape(words[i], strlen(words[i]), x_emacs_putbuf) < 0 || (++i < nwords && x_ins(space) < 0)) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } } @@ -2508,7 +2797,7 @@ do_complete(int flags, /* XCF_{COMMAND,FILE,COMMAND_FILE} */ &start, &end, &words, &is_command); /* no match */ if (nwords == 0) { - x_e_putc(7); + x_e_putc2(7); return; } if (type == CT_LIST) { @@ -2596,12 +2885,28 @@ x_e_getc(void) } static void -x_e_putc(int c) +x_e_putc2(int c) { + int width = 1; + if (c == '\r' || c == '\n') x_col = 0; if (x_col < xx_cols) { - x_putc(c); + if (Flag(FUTFHACK) && (c > 0x7F)) { + char utf_tmp[3]; + size_t x; + + if (c < 0xA0) + c = 0xFFFD; + x = wcxtomb(utf_tmp, c); + x_putc(utf_tmp[0]); + if (x > 1) + x_putc(utf_tmp[1]); + if (x > 2) + x_putc(utf_tmp[2]); + width = wcxwidth(c); + } else + x_putc(c); switch (c) { case 7: break; @@ -2612,7 +2917,43 @@ x_e_putc(int c) x_col--; break; default: - x_col++; + x_col += width; + break; + } + } + if (x_adj_ok && (x_col < 0 || x_col >= (xx_cols - 2))) + x_adjust(); +} + +static void +x_e_putc3(const char **cp) +{ + int width = 1, c = **(const unsigned char **)cp; + + if (c == '\r' || c == '\n') + x_col = 0; + if (x_col < xx_cols) { + if (Flag(FUTFHACK) && (c > 0x7F)) { + char *cp2; + + width = utf_widthadj(*cp, (const char **)&cp2); + while (*cp < cp2) + x_putc(*(*cp)++); + } else { + (*cp)++; + x_putc(c); + } + switch (c) { + case 7: + break; + case '\r': + case '\n': + break; + case '\b': + x_col--; + break; + default: + x_col += width; break; } } @@ -2626,7 +2967,7 @@ x_e_puts(const char *s) int adj = x_adj_done; while (*s && adj == x_adj_done) - x_e_putc(*s++); + x_e_putc3(&s); } /* NAME: @@ -2648,7 +2989,7 @@ x_set_arg(int c) for (; c >= 0 && isdigit((unsigned char)c); c = x_e_getc(), first = 0) n = n * 10 + (c - '0'); if (c < 0 || first) { - x_e_putc(7); + x_e_putc2(7); x_arg = 1; x_arg_defaulted = 1; } else { @@ -2668,7 +3009,7 @@ x_comment(int c __attribute__((unused))) int ret = x_do_comment(xbuf, xend - xbuf, &len); if (ret < 0) - x_e_putc(7); + x_e_putc2(7); else { xep = xbuf + len; *xep = '\0'; @@ -2734,7 +3075,7 @@ x_prev_histword(int c __attribute__((unused))) cp = *histptr; if (!cp) - x_e_putc(7); + x_e_putc2(7); else if (x_arg_defaulted) { rcp = &cp[strlen(cp) - 1]; /* @@ -2810,7 +3151,7 @@ x_fold_case(int c) char *cp = xcp; if (cp == xep) { - x_e_putc(7); + x_e_putc2(7); return KSTD; } while (x_arg--) { @@ -2862,8 +3203,8 @@ x_fold_case(int c) * edit buffer that will be the last displayed on the * screen. The sequence: * - * for (cp = x_lastcp(); cp > xcp; cp) - * x_bs(*--cp); + * for (cp = x_lastcp(); cp > xcp; ) + * x_bs2(cp = utf_backch(cp)); * * Will position the cursor correctly on the screen. * @@ -2873,13 +3214,18 @@ x_fold_case(int c) static char * x_lastcp(void) { - char *rcp; - int i; - if (!xlp_valid) { - for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp++) - i += x_size(*(u_char *)rcp); - xlp = rcp; + int i = 0, j; + char *xlp2; + + xlp = xbp; + while (xlp < xep) { + j = x_size2(xlp, &xlp2); + if ((i + j) >= x_displen) + break; + i += j; + xlp = xlp2; + } } xlp_valid = true; return (xlp); diff --git a/lex.c b/lex.c index fc8bbb1..217f4be 100644 --- a/lex.c +++ b/lex.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.21 2006/08/02 11:33:37 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.22 2006/11/05 15:31:36 tg Exp $"); /* Structure to keep track of the lexing state and the various pieces of info * needed for each particular state. */ @@ -1116,7 +1116,15 @@ dopprompt(const char *cp, int ntruncate, int doprint) columns--; } else if (*cp == delimiter) indelimit = !indelimit; - else + else if (Flag(FUTFHACK) && ((unsigned)*cp > 0x7F)) { + const char *cp2; + columns += utf_widthadj(cp, &cp2); + if (doprint && (indelimit || + (ntruncate < (x_cols * lines + columns)))) + shf_write(cp, cp2 - cp, shl_out); + cp = cp2 - /* loop increment */ 1; + continue; + } else columns++; if (doprint && (*cp != delimiter) && (indelimit || (ntruncate < (x_cols * lines + columns)))) diff --git a/sh.h b/sh.h index 07575b9..705a18a 100644 --- a/sh.h +++ b/sh.h @@ -8,7 +8,7 @@ /* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */ /* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */ -#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.48 2006/11/05 12:11:14 tg Exp $" +#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.49 2006/11/05 15:31:37 tg Exp $" #define MKSH_VERSION "R28 2006/11/05" #include @@ -1053,6 +1053,9 @@ void afree(void *, Area *); void x_init(void); int x_read(char *, size_t); int x_bind(const char *, const char *, int, int); +/* UTF-8 hack stuff */ +int utf_widthadj(const char *, const char **); +#define utf_width(x) utf_widthadj(x, NULL); /* eval.c */ char *substitute(const char *, int); char **eval(char **, int);