From 883d9d99b33e1d21db38ad60427a12bef5d79531 Mon Sep 17 00:00:00 2001 From: tg Date: Sat, 28 Nov 2009 14:21:47 +0000 Subject: [PATCH] =?UTF-8?q?switch=20${%foo}=20to=20wcswidth-like=20behavio?= =?UTF-8?q?ur=20=E2=80=93=20slightly=20problematic,=20and=20the=20?= =?UTF-8?q?=E2=80=9Cset=20+U=E2=80=9D=20case=20isn=E2=80=99t=20even=20hand?= =?UTF-8?q?led?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit committed to branch because I’d like to get more input on this, for now --- check.t | 6 +++++- eval.c | 7 ++++--- exec.c | 6 +++--- expr.c | 19 +++++++++++++++---- funcs.c | 4 ++-- misc.c | 4 ++-- sh.h | 4 ++-- shf.c | 4 ++-- var.c | 4 ++-- 9 files changed, 37 insertions(+), 21 deletions(-) diff --git a/check.t b/check.t index bef6c7f..215b502 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.343 2009/11/22 14:25:17 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.344 2009/11/28 14:21:41 tg Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ @@ -6699,8 +6699,12 @@ description: Check some corner cases stdin: print % $% . + set -U + x='a b' + print c ${%x} . expected-stdout: % $% . + c -1 . --- name: wcswidth-3 description: diff --git a/eval.c b/eval.c index ab07181..db6349b 100644 --- a/eval.c +++ b/eval.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.74 2009/11/22 13:49:04 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.75 2009/11/28 14:21:43 tg Exp $"); /* * string expansion @@ -949,12 +949,13 @@ varsub(Expand *xp, const char *sp, const char *word, } else { p = str_val(global(sp)); zero_ok = p != null; - c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p); + c = stype == '#' ? (int)utflen(p) : + utf_mbswidth(p, false); } if (Flag(FNOUNSET) && c == 0 && !zero_ok) errorf("%s: parameter not set", sp); *stypep = 0; /* unqualified variable/string substitution */ - xp->str = shf_smprintf("%u", (unsigned int)c); + xp->str = shf_smprintf("%d", c); return (XSUB); } diff --git a/exec.c b/exec.c index 073e837..88ef275 100644 --- a/exec.c +++ b/exec.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.68 2009/11/09 23:35:09 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.69 2009/11/28 14:21:43 tg Exp $"); static int comexec(struct op *, struct tbl *volatile, const char **, int volatile, volatile int *); @@ -1380,7 +1380,7 @@ pr_menu(const char * const *ap) i = strlen(*pp); if (i > aocts) aocts = i; - i = utf_mbswidth(*pp); + i = utf_mbswidth(*pp, true); if (i > acols) acols = i; } @@ -1420,7 +1420,7 @@ pr_list(char * const *ap) i = strlen(*pp); if (i > aocts) aocts = i; - i = utf_mbswidth(*pp); + i = utf_mbswidth(*pp, true); if (i > acols) acols = i; } diff --git a/expr.c b/expr.c index 88b1637..7e35ca5 100644 --- a/expr.c +++ b/expr.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.37 2009/10/04 13:19:33 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.38 2009/11/28 14:21:44 tg Exp $"); /* The order of these enums is constrained by the order of opinfo[] */ enum token { @@ -683,8 +683,15 @@ utf_widthadj(const char *src, const char **dst) return (width); } +/** + * In lenient mode, characters of width -1 are handled as one column + * per octet (kind of as a strlen replacement). Users of lenient mo- + * de should reconsider the code. + * In strict mode, this behaves like wcswidth(3) and returns -1 upon + * encounter of a control multibyte character. + */ int -utf_mbswidth(const char *s) +utf_mbswidth(const char *s, bool lenient) { size_t len; unsigned int wc; @@ -694,10 +701,14 @@ utf_mbswidth(const char *s) return (strlen(s)); while (*s) - if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) || - ((cw = utf_wcwidth(wc)) == -1)) { + if ((len = utf_mbtowc(&wc, s)) == (size_t)-1) { + by_octet: s++; width += 1; + } else if ((cw = utf_wcwidth(wc)) == -1) { + if (lenient) + goto by_octet; + return (-1); } else { s += len; width += cw; diff --git a/funcs.c b/funcs.c index 2a8b786..e3c92b2 100644 --- a/funcs.c +++ b/funcs.c @@ -25,7 +25,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.142 2009/11/09 23:35:09 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.143 2009/11/28 14:21:44 tg Exp $"); #if HAVE_KILLPG /* @@ -1551,7 +1551,7 @@ c_kill(const char **wp) w = strlen(sigtraps[j].mess); if (w > mess_octs) mess_octs = w; - w = utf_mbswidth(sigtraps[j].mess); + w = utf_mbswidth(sigtraps[j].mess, true); if (w > mess_cols) mess_cols = w; } diff --git a/misc.c b/misc.c index b1f347a..91d3645 100644 --- a/misc.c +++ b/misc.c @@ -29,7 +29,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.131 2009/11/23 12:48:19 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.132 2009/11/28 14:21:45 tg Exp $"); unsigned char chtypes[UCHAR_MAX + 1]; /* type bits for unsigned char */ @@ -154,7 +154,7 @@ printoptions(bool verbose) len = strlen(options[i].name); if (len > octs) octs = len; - len = utf_mbswidth(options[i].name); + len = utf_mbswidth(options[i].name, true); if (len > oi.opt_width) oi.opt_width = len; } diff --git a/sh.h b/sh.h index f77c80c..c06c974 100644 --- a/sh.h +++ b/sh.h @@ -134,7 +134,7 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.362 2009/11/22 14:25:18 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.363 2009/11/28 14:21:45 tg Exp $"); #endif #define MKSH_VERSION "R39 2009/11/22" @@ -1361,7 +1361,7 @@ int v_evaluate(struct tbl *, const char *, volatile int, bool); size_t utf_mbtowc(unsigned int *, const char *); size_t utf_wctomb(char *, unsigned int); int utf_widthadj(const char *, const char **); -int utf_mbswidth(const char *); +int utf_mbswidth(const char *, bool); const char *utf_skipcols(const char *, int); size_t utf_ptradj(const char *); int utf_wcwidth(unsigned int); diff --git a/shf.c b/shf.c index dee58e9..6d9c016 100644 --- a/shf.c +++ b/shf.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.33 2009/09/20 13:33:48 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.34 2009/11/28 14:21:46 tg Exp $"); /* flags to shf_emptybuf() */ #define EB_READSW 0x01 /* about to switch to reading */ @@ -932,7 +932,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args) case 's': if (!(s = va_arg(args, const char *))) s = "(null)"; - len = utf_mbswidth(s); + len = utf_mbswidth(s, true); break; case 'c': diff --git a/var.c b/var.c index ba41b1b..3654e29 100644 --- a/var.c +++ b/var.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/var.c,v 1.96 2009/10/18 12:30:06 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/var.c,v 1.97 2009/11/28 14:21:47 tg Exp $"); /* * Variables @@ -574,7 +574,7 @@ formatstr(struct tbl *vp, const char *s) char *p, *q; size_t psiz; - olen = utf_mbswidth(s); + olen = utf_mbswidth(s, true); if (vp->flag & (RJUST|LJUST)) { if (!vp->u2.field) /* default field width */