• re-implement wcswidth-like behaviour for ${%foo} even in !UTFMODE
• merge the rest of branch tg-wcswidth-behaviour • enhance test cases for wcswidth-like behaviour • switch hash table collision resolution algorithm to Python’s as announced • bump vsn
This commit is contained in:
parent
c413edb34a
commit
6115f5a91c
29
check.t
29
check.t
@ -1,4 +1,4 @@
|
|||||||
# $MirOS: src/bin/mksh/check.t,v 1.350 2009/12/31 14:05:42 tg Exp $
|
# $MirOS: src/bin/mksh/check.t,v 1.351 2010/01/01 17:44:06 tg Exp $
|
||||||
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
||||||
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
||||||
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
||||||
@ -25,7 +25,7 @@
|
|||||||
# http://www.research.att.com/~gsf/public/ifs.sh
|
# http://www.research.att.com/~gsf/public/ifs.sh
|
||||||
|
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
@(#)MIRBSD KSH R39 2009/12/31
|
@(#)MIRBSD KSH R39 2010/01/01
|
||||||
description:
|
description:
|
||||||
Check version of shell.
|
Check version of shell.
|
||||||
stdin:
|
stdin:
|
||||||
@ -6709,22 +6709,43 @@ description:
|
|||||||
stdin:
|
stdin:
|
||||||
s=何
|
s=何
|
||||||
set +U
|
set +U
|
||||||
print octets: ${#s} ${%s} .
|
print octets: ${#s} .
|
||||||
|
print 8-bit width: ${%s} .
|
||||||
|
set -U
|
||||||
|
print characters: ${#s} .
|
||||||
|
print columns: ${%s} .
|
||||||
|
s=<EFBFBD>
|
||||||
|
set +U
|
||||||
|
print octets: ${#s} .
|
||||||
|
print 8-bit width: ${%s} .
|
||||||
set -U
|
set -U
|
||||||
print characters: ${#s} .
|
print characters: ${#s} .
|
||||||
print columns: ${%s} .
|
print columns: ${%s} .
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
octets: 3 3 .
|
octets: 3 .
|
||||||
|
8-bit width: -1 .
|
||||||
characters: 1 .
|
characters: 1 .
|
||||||
columns: 2 .
|
columns: 2 .
|
||||||
|
octets: 3 .
|
||||||
|
8-bit width: 3 .
|
||||||
|
characters: 1 .
|
||||||
|
columns: 1 .
|
||||||
---
|
---
|
||||||
name: wcswidth-2
|
name: wcswidth-2
|
||||||
description:
|
description:
|
||||||
Check some corner cases
|
Check some corner cases
|
||||||
stdin:
|
stdin:
|
||||||
print % $% .
|
print % $% .
|
||||||
|
set -U
|
||||||
|
x='a b'
|
||||||
|
print c ${%x} .
|
||||||
|
set +U
|
||||||
|
x='a b'
|
||||||
|
print d ${%x} .
|
||||||
expected-stdout:
|
expected-stdout:
|
||||||
% $% .
|
% $% .
|
||||||
|
c -1 .
|
||||||
|
d -1 .
|
||||||
---
|
---
|
||||||
name: wcswidth-3
|
name: wcswidth-3
|
||||||
description:
|
description:
|
||||||
|
32
eval.c
32
eval.c
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
#include "sh.h"
|
#include "sh.h"
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.79 2009/12/05 22:24:35 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.80 2010/01/01 17:44:07 tg Exp $");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* string expansion
|
* string expansion
|
||||||
@ -941,12 +941,38 @@ varsub(Expand *xp, const char *sp, const char *word,
|
|||||||
} else {
|
} else {
|
||||||
p = str_val(global(sp));
|
p = str_val(global(sp));
|
||||||
zero_ok = p != null;
|
zero_ok = p != null;
|
||||||
c = stype == '#' ? (int)utflen(p) : utf_mbswidth(p);
|
if (stype == '#')
|
||||||
|
c = utflen(p);
|
||||||
|
else {
|
||||||
|
/* partial utf_mbswidth reimplementation */
|
||||||
|
const char *s = p;
|
||||||
|
unsigned int wc;
|
||||||
|
size_t len;
|
||||||
|
int cw;
|
||||||
|
|
||||||
|
c = 0;
|
||||||
|
while (*s) {
|
||||||
|
if (!UTFMODE || (len = utf_mbtowc(&wc,
|
||||||
|
s)) == (size_t)-1)
|
||||||
|
/* not UTFMODE or not UTF-8 */
|
||||||
|
wc = (unsigned char)(*s++);
|
||||||
|
else
|
||||||
|
/* UTFMODE and UTF-8 */
|
||||||
|
s += len;
|
||||||
|
/* wc == char or wchar at s++ */
|
||||||
|
if ((cw = utf_wcwidth(wc)) == -1) {
|
||||||
|
/* 646, 8859-1, 10646 C0/C1 */
|
||||||
|
c = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
c += cw;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (Flag(FNOUNSET) && c == 0 && !zero_ok)
|
if (Flag(FNOUNSET) && c == 0 && !zero_ok)
|
||||||
errorf("%s: parameter not set", sp);
|
errorf("%s: parameter not set", sp);
|
||||||
*stypep = 0; /* unqualified variable/string substitution */
|
*stypep = 0; /* unqualified variable/string substitution */
|
||||||
xp->str = shf_smprintf("%u", (unsigned int)c);
|
xp->str = shf_smprintf("%d", c);
|
||||||
return (XSUB);
|
return (XSUB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
61
main.c
61
main.c
@ -33,7 +33,7 @@
|
|||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.157 2009/12/05 17:43:47 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/main.c,v 1.158 2010/01/01 17:44:08 tg Exp $");
|
||||||
|
|
||||||
extern char **environ;
|
extern char **environ;
|
||||||
|
|
||||||
@ -1277,7 +1277,13 @@ maketemp(Area *ap, Temp_type type, struct temp **tlist)
|
|||||||
return (tp);
|
return (tp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We use a similar collision resolution algorithm as Python 2.5.4
|
||||||
|
* but with a slightly tweaked implementation written from scratch.
|
||||||
|
*/
|
||||||
|
|
||||||
#define INIT_TBLS 8 /* initial table size (power of 2) */
|
#define INIT_TBLS 8 /* initial table size (power of 2) */
|
||||||
|
#define PERTURB_SHIFT 5 /* see Python 2.5.4 Objects/dictobject.c */
|
||||||
|
|
||||||
static void texpand(struct table *, size_t);
|
static void texpand(struct table *, size_t);
|
||||||
static int tnamecmp(const void *, const void *);
|
static int tnamecmp(const void *, const void *);
|
||||||
@ -1307,8 +1313,8 @@ oaathash_full(register const uint8_t *bp)
|
|||||||
static void
|
static void
|
||||||
texpand(struct table *tp, size_t nsize)
|
texpand(struct table *tp, size_t nsize)
|
||||||
{
|
{
|
||||||
size_t i, osize = tp->size;
|
size_t i, j, osize = tp->size, perturb;
|
||||||
struct tbl *tblp, **p;
|
struct tbl *tblp, **pp;
|
||||||
struct tbl **ntblp, **otblp = tp->tbls;
|
struct tbl **ntblp, **otblp = tp->tbls;
|
||||||
|
|
||||||
ntblp = alloc(nsize * sizeof(struct tbl *), tp->areap);
|
ntblp = alloc(nsize * sizeof(struct tbl *), tp->areap);
|
||||||
@ -1319,14 +1325,22 @@ texpand(struct table *tp, size_t nsize)
|
|||||||
tp->tbls = ntblp;
|
tp->tbls = ntblp;
|
||||||
if (otblp == NULL)
|
if (otblp == NULL)
|
||||||
return;
|
return;
|
||||||
|
nsize--; /* from here on nsize := mask */
|
||||||
for (i = 0; i < osize; i++)
|
for (i = 0; i < osize; i++)
|
||||||
if ((tblp = otblp[i]) != NULL) {
|
if ((tblp = otblp[i]) != NULL) {
|
||||||
if ((tblp->flag & DEFINED)) {
|
if ((tblp->flag & DEFINED)) {
|
||||||
for (p = &ntblp[tblp->ua.hval &
|
/* search for free hash table slot */
|
||||||
(tp->size - 1)]; *p != NULL; p--)
|
j = (perturb = tblp->ua.hval) & nsize;
|
||||||
if (p == ntblp) /* wrap */
|
goto find_first_empty_slot;
|
||||||
p += tp->size;
|
find_next_empty_slot:
|
||||||
*p = tblp;
|
j = (j << 2) + j + perturb + 1;
|
||||||
|
perturb >>= PERTURB_SHIFT;
|
||||||
|
find_first_empty_slot:
|
||||||
|
pp = &ntblp[j & nsize];
|
||||||
|
if (*pp != NULL)
|
||||||
|
goto find_next_empty_slot;
|
||||||
|
/* found an empty hash table slot */
|
||||||
|
*pp = tblp;
|
||||||
tp->nfree--;
|
tp->nfree--;
|
||||||
} else if (!(tblp->flag & FINUSE)) {
|
} else if (!(tblp->flag & FINUSE)) {
|
||||||
afree(tblp, tp->areap);
|
afree(tblp, tp->areap);
|
||||||
@ -1345,25 +1359,26 @@ ktinit(struct table *tp, Area *ap, size_t tsize)
|
|||||||
texpand(tp, tsize);
|
texpand(tp, tsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* table, name (key) to search for, hash(n) */
|
/* table, name (key) to search for, hash(name), rv pointer to tbl ptr */
|
||||||
static struct tbl *
|
static struct tbl *
|
||||||
ktscan(struct table *tp, const char *n, uint32_t h, struct tbl ***ppp)
|
ktscan(struct table *tp, const char *name, uint32_t h, struct tbl ***ppp)
|
||||||
{
|
{
|
||||||
|
size_t j, perturb, mask;
|
||||||
struct tbl **pp, *p;
|
struct tbl **pp, *p;
|
||||||
|
|
||||||
/* search for name in hashed table */
|
mask = tp->size - 1;
|
||||||
for (pp = &tp->tbls[h & (tp->size - 1)]; (p = *pp) != NULL; pp--) {
|
/* search for hash table slot matching name */
|
||||||
if (p->ua.hval == h && !strcmp(p->name, n) &&
|
j = (perturb = h) & mask;
|
||||||
(p->flag & DEFINED))
|
goto find_first_slot;
|
||||||
goto found;
|
find_next_slot:
|
||||||
if (pp == tp->tbls)
|
j = (j << 2) + j + perturb + 1;
|
||||||
/* wrap */
|
perturb >>= PERTURB_SHIFT;
|
||||||
pp += tp->size;
|
find_first_slot:
|
||||||
}
|
pp = &tp->tbls[j & mask];
|
||||||
/* not found */
|
if ((p = *pp) != NULL && (p->ua.hval != h || !(p->flag & DEFINED) ||
|
||||||
p = NULL;
|
strcmp(p->name, name)))
|
||||||
|
goto find_next_slot;
|
||||||
found:
|
/* p == NULL if not found, correct found entry otherwise */
|
||||||
if (ppp)
|
if (ppp)
|
||||||
*ppp = pp;
|
*ppp = pp;
|
||||||
return (p);
|
return (p);
|
||||||
|
9
mksh.1
9
mksh.1
@ -1,4 +1,4 @@
|
|||||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.202 2009/12/01 19:15:33 tg Exp $
|
.\" $MirOS: src/bin/mksh/mksh.1,v 1.203 2010/01/01 17:44:08 tg Exp $
|
||||||
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
|
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
|
||||||
.\"-
|
.\"-
|
||||||
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
||||||
@ -71,7 +71,7 @@
|
|||||||
.\" with -mandoc, it might implement .Mx itself, but we want to
|
.\" with -mandoc, it might implement .Mx itself, but we want to
|
||||||
.\" use our own definition. And .Dd must come *first*, always.
|
.\" use our own definition. And .Dd must come *first*, always.
|
||||||
.\"
|
.\"
|
||||||
.Dd $Mdocdate: December 1 2009 $
|
.Dd $Mdocdate: January 1 2010 $
|
||||||
.\"
|
.\"
|
||||||
.\" Check which macro package we use
|
.\" Check which macro package we use
|
||||||
.\"
|
.\"
|
||||||
@ -1452,7 +1452,10 @@ The number of elements in the array
|
|||||||
The width
|
The width
|
||||||
.Pq in screen columns
|
.Pq in screen columns
|
||||||
of the string value of parameter
|
of the string value of parameter
|
||||||
.Ar name .
|
.Ar name ,
|
||||||
|
or -1 if
|
||||||
|
.Pf ${ Ns Ar name Ns }
|
||||||
|
contains a control character.
|
||||||
.Pp
|
.Pp
|
||||||
.It Pf ${! Ns Ar name Ns }
|
.It Pf ${! Ns Ar name Ns }
|
||||||
The name of the variable referred to by
|
The name of the variable referred to by
|
||||||
|
4
sh.h
4
sh.h
@ -148,9 +148,9 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EXTERN
|
#ifdef EXTERN
|
||||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.371 2009/12/31 14:05:44 tg Exp $");
|
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.372 2010/01/01 17:44:10 tg Exp $");
|
||||||
#endif
|
#endif
|
||||||
#define MKSH_VERSION "R39 2009/12/31"
|
#define MKSH_VERSION "R39 2010/01/01"
|
||||||
|
|
||||||
#ifndef MKSH_INCLUDES_ONLY
|
#ifndef MKSH_INCLUDES_ONLY
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user