• re-implement wcswidth-like behaviour for ${%foo} even in !UTFMODE

• merge the rest of branch tg-wcswidth-behaviour
• enhance test cases for wcswidth-like behaviour
• switch hash table collision resolution algorithm to Python’s as announced
• bump vsn
This commit is contained in:
tg 2010-01-01 17:44:10 +00:00
parent c413edb34a
commit 6115f5a91c
5 changed files with 100 additions and 35 deletions

29
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.350 2009/12/31 14:05:42 tg Exp $
# $MirOS: src/bin/mksh/check.t,v 1.351 2010/01/01 17:44:06 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout:
@(#)MIRBSD KSH R39 2009/12/31
@(#)MIRBSD KSH R39 2010/01/01
description:
Check version of shell.
stdin:
@ -6709,22 +6709,43 @@ description:
stdin:
s=
set +U
print octets: ${#s} ${%s} .
print octets: ${#s} .
print 8-bit width: ${%s} .
set -U
print characters: ${#s} .
print columns: ${%s} .
s=<EFBFBD>
set +U
print octets: ${#s} .
print 8-bit width: ${%s} .
set -U
print characters: ${#s} .
print columns: ${%s} .
expected-stdout:
octets: 3 3 .
octets: 3 .
8-bit width: -1 .
characters: 1 .
columns: 2 .
octets: 3 .
8-bit width: 3 .
characters: 1 .
columns: 1 .
---
name: wcswidth-2
description:
Check some corner cases
stdin:
print % $% .
set -U
x='a b'
print c ${%x} .
set +U
x='a b'
print d ${%x} .
expected-stdout:
% $% .
c -1 .
d -1 .
---
name: wcswidth-3
description:

32
eval.c
View File

@ -22,7 +22,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.79 2009/12/05 22:24:35 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.80 2010/01/01 17:44:07 tg Exp $");
/*
* string expansion
@ -941,12 +941,38 @@ varsub(Expand *xp, const char *sp, const char *word,
} else {
p = str_val(global(sp));
zero_ok = p != null;
c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p);
if (stype == '#')
c = utflen(p);
else {
/* partial utf_mbswidth reimplementation */
const char *s = p;
unsigned int wc;
size_t len;
int cw;
c = 0;
while (*s) {
if (!UTFMODE || (len = utf_mbtowc(&wc,
s)) == (size_t)-1)
/* not UTFMODE or not UTF-8 */
wc = (unsigned char)(*s++);
else
/* UTFMODE and UTF-8 */
s += len;
/* wc == char or wchar at s++ */
if ((cw = utf_wcwidth(wc)) == -1) {
/* 646, 8859-1, 10646 C0/C1 */
c = -1;
break;
}
c += cw;
}
}
}
if (Flag(FNOUNSET) && c == 0 && !zero_ok)
errorf("%s: parameter not set", sp);
*stypep = 0; /* unqualified variable/string substitution */
xp->str = shf_smprintf("%u", (unsigned int)c);
xp->str = shf_smprintf("%d", c);
return (XSUB);
}

61
main.c
View File

@ -33,7 +33,7 @@
#include <locale.h>
#endif
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.157 2009/12/05 17:43:47 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.158 2010/01/01 17:44:08 tg Exp $");
extern char **environ;
@ -1277,7 +1277,13 @@ maketemp(Area *ap, Temp_type type, struct temp **tlist)
return (tp);
}
/*
* We use a similar collision resolution algorithm as Python 2.5.4
* but with a slightly tweaked implementation written from scratch.
*/
#define INIT_TBLS 8 /* initial table size (power of 2) */
#define PERTURB_SHIFT 5 /* see Python 2.5.4 Objects/dictobject.c */
static void texpand(struct table *, size_t);
static int tnamecmp(const void *, const void *);
@ -1307,8 +1313,8 @@ oaathash_full(register const uint8_t *bp)
static void
texpand(struct table *tp, size_t nsize)
{
size_t i, osize = tp->size;
struct tbl *tblp, **p;
size_t i, j, osize = tp->size, perturb;
struct tbl *tblp, **pp;
struct tbl **ntblp, **otblp = tp->tbls;
ntblp = alloc(nsize * sizeof(struct tbl *), tp->areap);
@ -1319,14 +1325,22 @@ texpand(struct table *tp, size_t nsize)
tp->tbls = ntblp;
if (otblp == NULL)
return;
nsize--; /* from here on nsize := mask */
for (i = 0; i < osize; i++)
if ((tblp = otblp[i]) != NULL) {
if ((tblp->flag & DEFINED)) {
for (p = &ntblp[tblp->ua.hval &
(tp->size - 1)]; *p != NULL; p--)
if (p == ntblp) /* wrap */
p += tp->size;
*p = tblp;
/* search for free hash table slot */
j = (perturb = tblp->ua.hval) & nsize;
goto find_first_empty_slot;
find_next_empty_slot:
j = (j << 2) + j + perturb + 1;
perturb >>= PERTURB_SHIFT;
find_first_empty_slot:
pp = &ntblp[j & nsize];
if (*pp != NULL)
goto find_next_empty_slot;
/* found an empty hash table slot */
*pp = tblp;
tp->nfree--;
} else if (!(tblp->flag & FINUSE)) {
afree(tblp, tp->areap);
@ -1345,25 +1359,26 @@ ktinit(struct table *tp, Area *ap, size_t tsize)
texpand(tp, tsize);
}
/* table, name (key) to search for, hash(n) */
/* table, name (key) to search for, hash(name), rv pointer to tbl ptr */
static struct tbl *
ktscan(struct table *tp, const char *n, uint32_t h, struct tbl ***ppp)
ktscan(struct table *tp, const char *name, uint32_t h, struct tbl ***ppp)
{
size_t j, perturb, mask;
struct tbl **pp, *p;
/* search for name in hashed table */
for (pp = &tp->tbls[h & (tp->size - 1)]; (p = *pp) != NULL; pp--) {
if (p->ua.hval == h && !strcmp(p->name, n) &&
(p->flag & DEFINED))
goto found;
if (pp == tp->tbls)
/* wrap */
pp += tp->size;
}
/* not found */
p = NULL;
found:
mask = tp->size - 1;
/* search for hash table slot matching name */
j = (perturb = h) & mask;
goto find_first_slot;
find_next_slot:
j = (j << 2) + j + perturb + 1;
perturb >>= PERTURB_SHIFT;
find_first_slot:
pp = &tp->tbls[j & mask];
if ((p = *pp) != NULL && (p->ua.hval != h || !(p->flag & DEFINED) ||
strcmp(p->name, name)))
goto find_next_slot;
/* p == NULL if not found, correct found entry otherwise */
if (ppp)
*ppp = pp;
return (p);

9
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.202 2009/12/01 19:15:33 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.203 2010/01/01 17:44:08 tg Exp $
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
.\"-
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@ -71,7 +71,7 @@
.\" with -mandoc, it might implement .Mx itself, but we want to
.\" use our own definition. And .Dd must come *first*, always.
.\"
.Dd $Mdocdate: December 1 2009 $
.Dd $Mdocdate: January 1 2010 $
.\"
.\" Check which macro package we use
.\"
@ -1452,7 +1452,10 @@ The number of elements in the array
The width
.Pq in screen columns
of the string value of parameter
.Ar name .
.Ar name ,
or -1 if
.Pf ${ Ns Ar name Ns }
contains a control character.
.Pp
.It Pf ${! Ns Ar name Ns }
The name of the variable referred to by

4
sh.h
View File

@ -148,9 +148,9 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.371 2009/12/31 14:05:44 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.372 2010/01/01 17:44:10 tg Exp $");
#endif
#define MKSH_VERSION "R39 2009/12/31"
#define MKSH_VERSION "R39 2010/01/01"
#ifndef MKSH_INCLUDES_ONLY