From e20694ecebbc2701d53f904ae0d4d9d228d99788 Mon Sep 17 00:00:00 2001 From: tg Date: Sun, 20 Apr 2008 00:03:50 +0000 Subject: [PATCH] utf-8 function overhaul (size optimisation) XXX maybe we can get more out of this? --- edit.c | 40 +++++++++++++--------------------------- sh.h | 4 ++-- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/edit.c b/edit.c index 42f6955..d3392cd 100644 --- a/edit.c +++ b/edit.c @@ -5,7 +5,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.123 2008/04/19 22:50:01 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.124 2008/04/20 00:03:50 tg Exp $"); /* tty driver characters we are interested in */ typedef struct { @@ -59,7 +59,7 @@ static int x_command_glob(int, const char *, int, char ***); static int x_locate_word(const char *, int, int, int *, bool *); static int x_e_getmbc(char *); -static char *utf_getcpfromcols(char *, int); +static inline char *utf_getcpfromcols(char *, int); /* +++ generic editing functions +++ */ @@ -803,46 +803,32 @@ utf_mbswidth(const char *s) return (width); } -size_t +void utf_cptradj(const char *src, const char **dst) { size_t len; - if (!Flag(FUTFHACK) || *(const unsigned char *)src < 0xC2) + if (!Flag(FUTFHACK) || *(const unsigned char *)src < 0xC2 || + (len = utf_mbtowc(NULL, src)) == (size_t)-1) len = 1; - else if (*(const unsigned char *)src < 0xE0) - len = 2; - else if (*(const unsigned char *)src < 0xF0) - len = 3; - else - len = 1; - - if (len > 1) - if ((*(const unsigned char *)(src + 1) & 0xC0) != 0x80) - len = 1; - if (len > 2) - if ((*(const unsigned char *)(src + 2) & 0xC0) != 0x80) - len = 1; if (dst) *dst = src + len; - return (len); + /* return (len); */ } #if HAVE_EXPSTMT -#define utf_ptradj(s,d) ({ \ - union mksh_cchack out; \ - char **dst = (d); \ - size_t rv; \ - \ - rv = utf_cptradj((s), &out.ro); \ - if (dst) \ - *dst = out.rw; \ +#define utf_ptradj(s,d) ({ \ + union mksh_cchack utf_ptradj_o; \ + char **utf_ptradj_d = (d); \ + \ + utf_cptradj((s), &utf_ptradj_o.ro); \ + *utf_ptradj_d = utf_ptradj_o.rw; \ }) #else #define utf_ptradj(s,d) utf_cptradj((s), (const char **)(d)) #endif -static char * +static inline char * utf_getcpfromcols(char *p, int cols) { int c = 0; diff --git a/sh.h b/sh.h index 9d7744d..f6aab52 100644 --- a/sh.h +++ b/sh.h @@ -8,7 +8,7 @@ /* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */ /* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */ -#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.207 2008/04/19 22:15:05 tg Exp $" +#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.208 2008/04/20 00:03:50 tg Exp $" #define MKSH_VERSION "R33 2008/04/19" #if HAVE_SYS_PARAM_H @@ -1221,7 +1221,7 @@ int x_bind(const char *, const char *, int, int); /* UTF-8 hack stuff */ size_t utf_mbtowc(unsigned int *, const char *); size_t utf_wctomb(char *, unsigned int); -size_t utf_cptradj(const char *, const char **); +void utf_cptradj(const char *, const char **); int utf_widthadj(const char *, const char **); int utf_mbswidth(const char *); int utf_wcwidth(unsigned int);