• re-implement wcswidth-like behaviour for ${%foo} even in !UTFMODE

• merge the rest of branch tg-wcswidth-behaviour
• enhance test cases for wcswidth-like behaviour
• switch hash table collision resolution algorithm to Python’s as announced
• bump vsn
This commit is contained in:
tg 2010-01-01 17:44:10 +00:00
parent c413edb34a
commit 6115f5a91c
5 changed files with 100 additions and 35 deletions

29
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.350 2009/12/31 14:05:42 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.351 2010/01/01 17:44:06 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh # http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout: expected-stdout:
@(#)MIRBSD KSH R39 2009/12/31 @(#)MIRBSD KSH R39 2010/01/01
description: description:
Check version of shell. Check version of shell.
stdin: stdin:
@ -6709,22 +6709,43 @@ description:
stdin: stdin:
s= s=
set +U set +U
print octets: ${#s} ${%s} . print octets: ${#s} .
print 8-bit width: ${%s} .
set -U
print characters: ${#s} .
print columns: ${%s} .
s=<EFBFBD>
set +U
print octets: ${#s} .
print 8-bit width: ${%s} .
set -U set -U
print characters: ${#s} . print characters: ${#s} .
print columns: ${%s} . print columns: ${%s} .
expected-stdout: expected-stdout:
octets: 3 3 . octets: 3 .
8-bit width: -1 .
characters: 1 . characters: 1 .
columns: 2 . columns: 2 .
octets: 3 .
8-bit width: 3 .
characters: 1 .
columns: 1 .
--- ---
name: wcswidth-2 name: wcswidth-2
description: description:
Check some corner cases Check some corner cases
stdin: stdin:
print % $% . print % $% .
set -U
x='a b'
print c ${%x} .
set +U
x='a b'
print d ${%x} .
expected-stdout: expected-stdout:
% $% . % $% .
c -1 .
d -1 .
--- ---
name: wcswidth-3 name: wcswidth-3
description: description:

32
eval.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.79 2009/12/05 22:24:35 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/eval.c,v 1.80 2010/01/01 17:44:07 tg Exp $");
/* /*
* string expansion * string expansion
@ -941,12 +941,38 @@ varsub(Expand *xp, const char *sp, const char *word,
} else { } else {
p = str_val(global(sp)); p = str_val(global(sp));
zero_ok = p != null; zero_ok = p != null;
c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p); if (stype == '#')
c = utflen(p);
else {
/* partial utf_mbswidth reimplementation */
const char *s = p;
unsigned int wc;
size_t len;
int cw;
c = 0;
while (*s) {
if (!UTFMODE || (len = utf_mbtowc(&wc,
s)) == (size_t)-1)
/* not UTFMODE or not UTF-8 */
wc = (unsigned char)(*s++);
else
/* UTFMODE and UTF-8 */
s += len;
/* wc == char or wchar at s++ */
if ((cw = utf_wcwidth(wc)) == -1) {
/* 646, 8859-1, 10646 C0/C1 */
c = -1;
break;
}
c += cw;
}
}
} }
if (Flag(FNOUNSET) && c == 0 && !zero_ok) if (Flag(FNOUNSET) && c == 0 && !zero_ok)
errorf("%s: parameter not set", sp); errorf("%s: parameter not set", sp);
*stypep = 0; /* unqualified variable/string substitution */ *stypep = 0; /* unqualified variable/string substitution */
xp->str = shf_smprintf("%u", (unsigned int)c); xp->str = shf_smprintf("%d", c);
return (XSUB); return (XSUB);
} }

61
main.c
View File

@ -33,7 +33,7 @@
#include <locale.h> #include <locale.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.157 2009/12/05 17:43:47 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/main.c,v 1.158 2010/01/01 17:44:08 tg Exp $");
extern char **environ; extern char **environ;
@ -1277,7 +1277,13 @@ maketemp(Area *ap, Temp_type type, struct temp **tlist)
return (tp); return (tp);
} }
/*
* We use a similar collision resolution algorithm as Python 2.5.4
* but with a slightly tweaked implementation written from scratch.
*/
#define INIT_TBLS 8 /* initial table size (power of 2) */ #define INIT_TBLS 8 /* initial table size (power of 2) */
#define PERTURB_SHIFT 5 /* see Python 2.5.4 Objects/dictobject.c */
static void texpand(struct table *, size_t); static void texpand(struct table *, size_t);
static int tnamecmp(const void *, const void *); static int tnamecmp(const void *, const void *);
@ -1307,8 +1313,8 @@ oaathash_full(register const uint8_t *bp)
static void static void
texpand(struct table *tp, size_t nsize) texpand(struct table *tp, size_t nsize)
{ {
size_t i, osize = tp->size; size_t i, j, osize = tp->size, perturb;
struct tbl *tblp, **p; struct tbl *tblp, **pp;
struct tbl **ntblp, **otblp = tp->tbls; struct tbl **ntblp, **otblp = tp->tbls;
ntblp = alloc(nsize * sizeof(struct tbl *), tp->areap); ntblp = alloc(nsize * sizeof(struct tbl *), tp->areap);
@ -1319,14 +1325,22 @@ texpand(struct table *tp, size_t nsize)
tp->tbls = ntblp; tp->tbls = ntblp;
if (otblp == NULL) if (otblp == NULL)
return; return;
nsize--; /* from here on nsize := mask */
for (i = 0; i < osize; i++) for (i = 0; i < osize; i++)
if ((tblp = otblp[i]) != NULL) { if ((tblp = otblp[i]) != NULL) {
if ((tblp->flag & DEFINED)) { if ((tblp->flag & DEFINED)) {
for (p = &ntblp[tblp->ua.hval & /* search for free hash table slot */
(tp->size - 1)]; *p != NULL; p--) j = (perturb = tblp->ua.hval) & nsize;
if (p == ntblp) /* wrap */ goto find_first_empty_slot;
p += tp->size; find_next_empty_slot:
*p = tblp; j = (j << 2) + j + perturb + 1;
perturb >>= PERTURB_SHIFT;
find_first_empty_slot:
pp = &ntblp[j & nsize];
if (*pp != NULL)
goto find_next_empty_slot;
/* found an empty hash table slot */
*pp = tblp;
tp->nfree--; tp->nfree--;
} else if (!(tblp->flag & FINUSE)) { } else if (!(tblp->flag & FINUSE)) {
afree(tblp, tp->areap); afree(tblp, tp->areap);
@ -1345,25 +1359,26 @@ ktinit(struct table *tp, Area *ap, size_t tsize)
texpand(tp, tsize); texpand(tp, tsize);
} }
/* table, name (key) to search for, hash(n) */ /* table, name (key) to search for, hash(name), rv pointer to tbl ptr */
static struct tbl * static struct tbl *
ktscan(struct table *tp, const char *n, uint32_t h, struct tbl ***ppp) ktscan(struct table *tp, const char *name, uint32_t h, struct tbl ***ppp)
{ {
size_t j, perturb, mask;
struct tbl **pp, *p; struct tbl **pp, *p;
/* search for name in hashed table */ mask = tp->size - 1;
for (pp = &tp->tbls[h & (tp->size - 1)]; (p = *pp) != NULL; pp--) { /* search for hash table slot matching name */
if (p->ua.hval == h && !strcmp(p->name, n) && j = (perturb = h) & mask;
(p->flag & DEFINED)) goto find_first_slot;
goto found; find_next_slot:
if (pp == tp->tbls) j = (j << 2) + j + perturb + 1;
/* wrap */ perturb >>= PERTURB_SHIFT;
pp += tp->size; find_first_slot:
} pp = &tp->tbls[j & mask];
/* not found */ if ((p = *pp) != NULL && (p->ua.hval != h || !(p->flag & DEFINED) ||
p = NULL; strcmp(p->name, name)))
goto find_next_slot;
found: /* p == NULL if not found, correct found entry otherwise */
if (ppp) if (ppp)
*ppp = pp; *ppp = pp;
return (p); return (p);

9
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.202 2009/12/01 19:15:33 tg Exp $ .\" $MirOS: src/bin/mksh/mksh.1,v 1.203 2010/01/01 17:44:08 tg Exp $
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $ .\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
.\"- .\"-
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@ -71,7 +71,7 @@
.\" with -mandoc, it might implement .Mx itself, but we want to .\" with -mandoc, it might implement .Mx itself, but we want to
.\" use our own definition. And .Dd must come *first*, always. .\" use our own definition. And .Dd must come *first*, always.
.\" .\"
.Dd $Mdocdate: December 1 2009 $ .Dd $Mdocdate: January 1 2010 $
.\" .\"
.\" Check which macro package we use .\" Check which macro package we use
.\" .\"
@ -1452,7 +1452,10 @@ The number of elements in the array
The width The width
.Pq in screen columns .Pq in screen columns
of the string value of parameter of the string value of parameter
.Ar name . .Ar name ,
or -1 if
.Pf ${ Ns Ar name Ns }
contains a control character.
.Pp .Pp
.It Pf ${! Ns Ar name Ns } .It Pf ${! Ns Ar name Ns }
The name of the variable referred to by The name of the variable referred to by

4
sh.h
View File

@ -148,9 +148,9 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.371 2009/12/31 14:05:44 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.372 2010/01/01 17:44:10 tg Exp $");
#endif #endif
#define MKSH_VERSION "R39 2009/12/31" #define MKSH_VERSION "R39 2010/01/01"
#ifndef MKSH_INCLUDES_ONLY #ifndef MKSH_INCLUDES_ONLY