diff --git a/check.t b/check.t index 6855cf9..fa20ed0 100644 --- a/check.t +++ b/check.t @@ -1,4 +1,4 @@ -# $MirOS: src/bin/mksh/check.t,v 1.350 2009/12/31 14:05:42 tg Exp $ +# $MirOS: src/bin/mksh/check.t,v 1.351 2010/01/01 17:44:06 tg Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ @@ -25,7 +25,7 @@ # http://www.research.att.com/~gsf/public/ifs.sh expected-stdout: - @(#)MIRBSD KSH R39 2009/12/31 + @(#)MIRBSD KSH R39 2010/01/01 description: Check version of shell. stdin: @@ -6709,22 +6709,43 @@ description: stdin: s=何 set +U - print octets: ${#s} ${%s} . + print octets: ${#s} . + print 8-bit width: ${%s} . + set -U + print characters: ${#s} . + print columns: ${%s} . + s=� + set +U + print octets: ${#s} . + print 8-bit width: ${%s} . set -U print characters: ${#s} . print columns: ${%s} . expected-stdout: - octets: 3 3 . + octets: 3 . + 8-bit width: -1 . characters: 1 . columns: 2 . + octets: 3 . + 8-bit width: 3 . + characters: 1 . + columns: 1 . --- name: wcswidth-2 description: Check some corner cases stdin: print % $% . + set -U + x='a b' + print c ${%x} . + set +U + x='a b' + print d ${%x} . expected-stdout: % $% . + c -1 . + d -1 . --- name: wcswidth-3 description: diff --git a/eval.c b/eval.c index b9d8dd5..c61622b 100644 --- a/eval.c +++ b/eval.c @@ -22,7 +22,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.79 2009/12/05 22:24:35 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.80 2010/01/01 17:44:07 tg Exp $"); /* * string expansion @@ -941,12 +941,38 @@ varsub(Expand *xp, const char *sp, const char *word, } else { p = str_val(global(sp)); zero_ok = p != null; - c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p); + if (stype == '#') + c = utflen(p); + else { + /* partial utf_mbswidth reimplementation */ + const char *s = p; + unsigned int wc; + size_t len; + int cw; + + c = 0; + while (*s) { + if (!UTFMODE || (len = utf_mbtowc(&wc, + s)) == (size_t)-1) + /* not UTFMODE or not UTF-8 */ + wc = (unsigned char)(*s++); + else + /* UTFMODE and UTF-8 */ + s += len; + /* wc == char or wchar at s++ */ + if ((cw = utf_wcwidth(wc)) == -1) { + /* 646, 8859-1, 10646 C0/C1 */ + c = -1; + break; + } + c += cw; + } + } } if (Flag(FNOUNSET) && c == 0 && !zero_ok) errorf("%s: parameter not set", sp); *stypep = 0; /* unqualified variable/string substitution */ - xp->str = shf_smprintf("%u", (unsigned int)c); + xp->str = shf_smprintf("%d", c); return (XSUB); } diff --git a/main.c b/main.c index 37fc3aa..2edcd6b 100644 --- a/main.c +++ b/main.c @@ -33,7 +33,7 @@ #include <locale.h> #endif -__RCSID("$MirOS: src/bin/mksh/main.c,v 1.157 2009/12/05 17:43:47 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/main.c,v 1.158 2010/01/01 17:44:08 tg Exp $"); extern char **environ; @@ -1277,7 +1277,13 @@ maketemp(Area *ap, Temp_type type, struct temp **tlist) return (tp); } +/* + * We use a similar collision resolution algorithm as Python 2.5.4 + * but with a slightly tweaked implementation written from scratch. + */ + #define INIT_TBLS 8 /* initial table size (power of 2) */ +#define PERTURB_SHIFT 5 /* see Python 2.5.4 Objects/dictobject.c */ static void texpand(struct table *, size_t); static int tnamecmp(const void *, const void *); @@ -1307,8 +1313,8 @@ oaathash_full(register const uint8_t *bp) static void texpand(struct table *tp, size_t nsize) { - size_t i, osize = tp->size; - struct tbl *tblp, **p; + size_t i, j, osize = tp->size, perturb; + struct tbl *tblp, **pp; struct tbl **ntblp, **otblp = tp->tbls; ntblp = alloc(nsize * sizeof(struct tbl *), tp->areap); @@ -1319,14 +1325,22 @@ texpand(struct table *tp, size_t nsize) tp->tbls = ntblp; if (otblp == NULL) return; + nsize--; /* from here on nsize := mask */ for (i = 0; i < osize; i++) if ((tblp = otblp[i]) != NULL) { if ((tblp->flag & DEFINED)) { - for (p = &ntblp[tblp->ua.hval & - (tp->size - 1)]; *p != NULL; p--) - if (p == ntblp) /* wrap */ - p += tp->size; - *p = tblp; + /* search for free hash table slot */ + j = (perturb = tblp->ua.hval) & nsize; + goto find_first_empty_slot; + find_next_empty_slot: + j = (j << 2) + j + perturb + 1; + perturb >>= PERTURB_SHIFT; + find_first_empty_slot: + pp = &ntblp[j & nsize]; + if (*pp != NULL) + goto find_next_empty_slot; + /* found an empty hash table slot */ + *pp = tblp; tp->nfree--; } else if (!(tblp->flag & FINUSE)) { afree(tblp, tp->areap); @@ -1345,25 +1359,26 @@ ktinit(struct table *tp, Area *ap, size_t tsize) texpand(tp, tsize); } -/* table, name (key) to search for, hash(n) */ +/* table, name (key) to search for, hash(name), rv pointer to tbl ptr */ static struct tbl * -ktscan(struct table *tp, const char *n, uint32_t h, struct tbl ***ppp) +ktscan(struct table *tp, const char *name, uint32_t h, struct tbl ***ppp) { + size_t j, perturb, mask; struct tbl **pp, *p; - /* search for name in hashed table */ - for (pp = &tp->tbls[h & (tp->size - 1)]; (p = *pp) != NULL; pp--) { - if (p->ua.hval == h && !strcmp(p->name, n) && - (p->flag & DEFINED)) - goto found; - if (pp == tp->tbls) - /* wrap */ - pp += tp->size; - } - /* not found */ - p = NULL; - - found: + mask = tp->size - 1; + /* search for hash table slot matching name */ + j = (perturb = h) & mask; + goto find_first_slot; + find_next_slot: + j = (j << 2) + j + perturb + 1; + perturb >>= PERTURB_SHIFT; + find_first_slot: + pp = &tp->tbls[j & mask]; + if ((p = *pp) != NULL && (p->ua.hval != h || !(p->flag & DEFINED) || + strcmp(p->name, name))) + goto find_next_slot; + /* p == NULL if not found, correct found entry otherwise */ if (ppp) *ppp = pp; return (p); diff --git a/mksh.1 b/mksh.1 index 4dd619e..b0ede6f 100644 --- a/mksh.1 +++ b/mksh.1 @@ -1,4 +1,4 @@ -.\" $MirOS: src/bin/mksh/mksh.1,v 1.202 2009/12/01 19:15:33 tg Exp $ +.\" $MirOS: src/bin/mksh/mksh.1,v 1.203 2010/01/01 17:44:08 tg Exp $ .\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $ .\"- .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 @@ -71,7 +71,7 @@ .\" with -mandoc, it might implement .Mx itself, but we want to .\" use our own definition. And .Dd must come *first*, always. .\" -.Dd $Mdocdate: December 1 2009 $ +.Dd $Mdocdate: January 1 2010 $ .\" .\" Check which macro package we use .\" @@ -1452,7 +1452,10 @@ The number of elements in the array The width .Pq in screen columns of the string value of parameter -.Ar name . +.Ar name , +or -1 if +.Pf ${ Ns Ar name Ns } +contains a control character. .Pp .It Pf ${! Ns Ar name Ns } The name of the variable referred to by diff --git a/sh.h b/sh.h index 611982c..5235b4d 100644 --- a/sh.h +++ b/sh.h @@ -148,9 +148,9 @@ #endif #ifdef EXTERN -__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.371 2009/12/31 14:05:44 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.372 2010/01/01 17:44:10 tg Exp $"); #endif -#define MKSH_VERSION "R39 2009/12/31" +#define MKSH_VERSION "R39 2010/01/01" #ifndef MKSH_INCLUDES_ONLY