this began as a one-word change to the spec (manpage): that
${foo:1:2} operates on characters ipv bytes – which means: ‣ set +U: octets ‣ set -U: MirOS OPTU-8 characters for consistency I also adapted ${#stringname} to deliver the length in characters ipv bytes; more may follow; for example I’d like a way to expose the string width. you can already get the MirOS OPTU-16 of a character in the WTF-8 (「set -U」) mode with something like │ typeset -Uui16 -Z7 x=1#${stringname:position:1} which will correctly use the PUA EF80‥EFFF mapping for octets. due to this being an incompatible change, bump to R38 also change the unicode-hexdump sample regression test and add two news for ${x:1:2} and ${#x} checks in A/W mode ☺
This commit is contained in:
parent
8a7223d4cd
commit
cc783807d3
162
check.t
162
check.t
@ -1,4 +1,4 @@
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.273 2009/05/16 14:19:20 tg Exp $
|
||||
# $MirOS: src/bin/mksh/check.t,v 1.274 2009/05/16 15:53:00 tg Exp $
|
||||
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
|
||||
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
|
||||
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
|
||||
@ -7,7 +7,7 @@
|
||||
# http://www.research.att.com/~gsf/public/ifs.sh
|
||||
|
||||
expected-stdout:
|
||||
@(#)MIRBSD KSH R37 2009/05/16
|
||||
@(#)MIRBSD KSH R38 2009/05/16
|
||||
description:
|
||||
Check version of shell.
|
||||
stdin:
|
||||
@ -4891,6 +4891,38 @@ stdin:
|
||||
expected-stdout:
|
||||
a defg a
|
||||
---
|
||||
name: varexpand-substr-5A
|
||||
description:
|
||||
Check that substring expansions work on characters
|
||||
stdin:
|
||||
set +U
|
||||
x=mäh
|
||||
print a ${x::1} ${x: -1} a
|
||||
print b ${x::3} ${x: -3} b
|
||||
print c ${x:1:2} ${x: -3:2} c
|
||||
print d ${#x} d
|
||||
expected-stdout:
|
||||
a m h a
|
||||
b mä äh b
|
||||
c ä ä c
|
||||
d 4 d
|
||||
---
|
||||
name: varexpand-substr-5W
|
||||
description:
|
||||
Check that substring expansions work on characters
|
||||
stdin:
|
||||
set -U
|
||||
x=mäh
|
||||
print a ${x::1} ${x: -1} a
|
||||
print b ${x::2} ${x: -2} b
|
||||
print c ${x:1:1} ${x: -2:1} c
|
||||
print d ${#x} d
|
||||
expected-stdout:
|
||||
a m h a
|
||||
b mä äh b
|
||||
c ä ä c
|
||||
d 3 d
|
||||
---
|
||||
name: print-funny-chars
|
||||
description:
|
||||
Check print builtin's capability to output designated characters
|
||||
@ -5080,7 +5112,7 @@ expected-stderr-pattern:
|
||||
/1#à€€: unexpected '€'/
|
||||
expected-exit: e != 0
|
||||
---
|
||||
name: integer-base-one-3a
|
||||
name: integer-base-one-3A
|
||||
description:
|
||||
some sample code for hexdumping
|
||||
stdin:
|
||||
@ -5147,7 +5179,7 @@ expected-stdout:
|
||||
00000110 EF F0 F1 F2 F3 F4 F5 F6 - F7 F8 F9 FA FB FC FD FE |................|
|
||||
00000120 FF 0A - |..|
|
||||
---
|
||||
name: integer-base-one-3b
|
||||
name: integer-base-one-3W
|
||||
description:
|
||||
some sample code for hexdumping Unicode
|
||||
stdin:
|
||||
@ -5169,7 +5201,7 @@ stdin:
|
||||
print '�￾￿' # end of range
|
||||
} | {
|
||||
typeset -Uui16 -Z11 pos=0
|
||||
typeset -Uui16 -Z5 hv
|
||||
typeset -Uui16 -Z7 hv
|
||||
typeset -i1 wc=0x0A
|
||||
dasc=
|
||||
nl=${wc#1#}
|
||||
@ -5177,27 +5209,7 @@ stdin:
|
||||
while IFS= read -r line; do
|
||||
line=$line$nl
|
||||
while [[ -n $line ]]; do
|
||||
(( hv = 1#${line::1} & 0xFF ))
|
||||
if (( (hv < 0xC2) || (hv >= 0xF0) )); then
|
||||
n=1
|
||||
elif (( hv < 0xE0 )); then
|
||||
n=2
|
||||
else
|
||||
n=3
|
||||
fi
|
||||
if (( n > 1 )); then
|
||||
(( (1#${line:1:1} & 0xC0) == 0x80 )) || n=1
|
||||
(( hv == 0xE0 )) && \
|
||||
(( (1#${line:1:1} & 0xFF) < 0xA0 )) && n=1
|
||||
fi
|
||||
if (( n > 2 )); then
|
||||
(( hv = 1#${line:2:1} & 0xFF ))
|
||||
(( (hv & 0xC0) == 0x80 )) || n=1
|
||||
(( (((1#${line::1} & 0xFF) == 0xEF) && \
|
||||
((1#${line:1:1} & 0xFF) == 0xBF) && \
|
||||
(hv > 0xBD)) )) && n=1
|
||||
fi
|
||||
wc=1#${line::n}
|
||||
wc=1#${line::1}
|
||||
if (( (wc < 32) || \
|
||||
((wc > 126) && (wc < 160)) )); then
|
||||
dch=.
|
||||
@ -5206,62 +5218,70 @@ stdin:
|
||||
else
|
||||
dch=${wc#1#}
|
||||
fi
|
||||
if (( (pos & 15) >= (n == 3 ? 14 : 15) )); then
|
||||
if (( (pos & 7) >= 7 )); then
|
||||
dasc=$dasc$dch
|
||||
dch=
|
||||
fi
|
||||
while (( n-- )); do
|
||||
if (( (pos & 15) == 0 )); then
|
||||
(( pos )) && print "$dasc|"
|
||||
print -n "${pos#16#} "
|
||||
dasc=' |'
|
||||
fi
|
||||
hv=1#${line::1}
|
||||
print -n "${hv#16#} "
|
||||
(( (pos++ & 15) == 7 )) && \
|
||||
print -n -- '- '
|
||||
line=${line:1}
|
||||
done
|
||||
if (( (pos & 7) == 0 )); then
|
||||
(( pos )) && print "$dasc|"
|
||||
print -n "${pos#16#} "
|
||||
dasc=' |'
|
||||
fi
|
||||
let hv=wc
|
||||
print -n "${hv#16#} "
|
||||
(( (pos++ & 7) == 3 )) && \
|
||||
print -n -- '- '
|
||||
line=${line:1}
|
||||
dasc=$dasc$dch
|
||||
done
|
||||
done
|
||||
if (( pos & 15 )); then
|
||||
while (( pos & 15 )); do
|
||||
print -n ' '
|
||||
(( (pos++ & 15) == 7 )) && print -n -- '- '
|
||||
if (( pos & 7 )); then
|
||||
while (( pos & 7 )); do
|
||||
print -n ' '
|
||||
(( (pos++ & 7) == 3 )) && print -n -- '- '
|
||||
done
|
||||
print "$dasc|"
|
||||
fi
|
||||
}
|
||||
expected-stdout:
|
||||
00000000 48 65 6C 6C 6F 2C 20 57 - 6F 72 6C 64 21 5C 0A E3 |Hello, World!\.こ|
|
||||
00000010 81 93 E3 82 93 E3 81 AB - E3 81 A1 E3 81 AF EF BC |んにちは!|
|
||||
00000020 81 0A 01 02 03 04 05 06 - 07 08 09 0A 0B 0C 0D 0E |...............|
|
||||
00000030 0F 10 11 12 13 14 15 16 - 17 18 19 1A 1B 1C 1D 1E |................|
|
||||
00000040 1F 20 21 22 23 24 25 26 - 27 28 29 2A 2B 2C 2D 2E |. !"#$%&'()*+,-.|
|
||||
00000050 2F 30 31 32 33 34 35 36 - 37 38 39 3A 3B 3C 3D 3E |/0123456789:;<=>|
|
||||
00000060 3F 40 41 42 43 44 45 46 - 47 48 49 4A 4B 4C 4D 4E |?@ABCDEFGHIJKLMN|
|
||||
00000070 4F 50 51 52 53 54 55 56 - 57 58 59 5A 5B 5C 5D 5E |OPQRSTUVWXYZ[\]^|
|
||||
00000080 5F 60 61 62 63 64 65 66 - 67 68 69 6A 6B 6C 6D 6E |_`abcdefghijklmn|
|
||||
00000090 6F 70 71 72 73 74 75 76 - 77 78 79 7A 7B 7C 7D 7E |opqrstuvwxyz{|}~|
|
||||
000000A0 7F C2 80 C2 81 C2 82 C2 - 83 C2 84 C2 85 C2 86 C2 |.........|
|
||||
000000B0 87 C2 88 C2 89 C2 8A C2 - 8B C2 8C C2 8D C2 8E C2 |........|
|
||||
000000C0 8F C2 90 C2 91 C2 92 C2 - 93 C2 94 C2 95 C2 96 C2 |........|
|
||||
000000D0 97 C2 98 C2 99 C2 9A C2 - 9B C2 9C C2 9D C2 9E C2 |........|
|
||||
000000E0 9F C2 A0 C2 A1 C2 A2 C2 - A3 C2 A4 C2 A5 C2 A6 C2 | ¡¢£¤¥¦§|
|
||||
000000F0 A7 C2 A8 C2 A9 C2 AA C2 - AB C2 AC C2 AD C2 AE C2 |¨©ª«¬®¯|
|
||||
00000100 AF C2 B0 C2 B1 C2 B2 C2 - B3 C2 B4 C2 B5 C2 B6 C2 |°±²³´µ¶·|
|
||||
00000110 B7 C2 B8 C2 B9 C2 BA C2 - BB C2 BC C2 BD C2 BE C2 |¸¹º»¼½¾¿|
|
||||
00000120 BF C3 80 C3 81 C3 82 C3 - 83 C3 84 C3 85 C3 86 C3 |ÀÁÂÃÄÅÆÇ|
|
||||
00000130 87 C3 88 C3 89 C3 8A C3 - 8B C3 8C C3 8D C3 8E C3 |ÈÉÊËÌÍÎÏ|
|
||||
00000140 8F C3 90 C3 91 C3 92 C3 - 93 C3 94 C3 95 C3 96 C3 |ÐÑÒÓÔÕÖ×|
|
||||
00000150 97 C3 98 C3 99 C3 9A C3 - 9B C3 9C C3 9D C3 9E C3 |ØÙÚÛÜÝÞß|
|
||||
00000160 9F C3 A0 C3 A1 C3 A2 C3 - A3 C3 A4 C3 A5 C3 A6 C3 |àáâãäåæç|
|
||||
00000170 A7 C3 A8 C3 A9 C3 AA C3 - AB C3 AC C3 AD C3 AE C3 |èéêëìíîï|
|
||||
00000180 AF C3 B0 C3 B1 C3 B2 C3 - B3 C3 B4 C3 B5 C3 B6 C3 |ðñòóôõö÷|
|
||||
00000190 B7 C3 B8 C3 B9 C3 BA C3 - BB C3 BC C3 BD C3 BE C3 |øùúûüýþÿ|
|
||||
000001A0 BF 0A FF 0A C2 0A EF BF - C0 0A C0 80 0A E0 80 80 |.<EFBFBD>.<EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>.<EFBFBD><EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>|
|
||||
000001B0 0A EF BF BD EF BF BE EF - BF BF 0A |.<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.|
|
||||
00000000 0048 0065 006C 006C - 006F 002C 0020 0057 |Hello, W|
|
||||
00000008 006F 0072 006C 0064 - 0021 005C 000A 3053 |orld!\.こ|
|
||||
00000010 3093 306B 3061 306F - FF01 000A 0001 0002 |んにちは!...|
|
||||
00000018 0003 0004 0005 0006 - 0007 0008 0009 000A |........|
|
||||
00000020 000B 000C 000D 000E - 000F 0010 0011 0012 |........|
|
||||
00000028 0013 0014 0015 0016 - 0017 0018 0019 001A |........|
|
||||
00000030 001B 001C 001D 001E - 001F 0020 0021 0022 |..... !"|
|
||||
00000038 0023 0024 0025 0026 - 0027 0028 0029 002A |#$%&'()*|
|
||||
00000040 002B 002C 002D 002E - 002F 0030 0031 0032 |+,-./012|
|
||||
00000048 0033 0034 0035 0036 - 0037 0038 0039 003A |3456789:|
|
||||
00000050 003B 003C 003D 003E - 003F 0040 0041 0042 |;<=>?@AB|
|
||||
00000058 0043 0044 0045 0046 - 0047 0048 0049 004A |CDEFGHIJ|
|
||||
00000060 004B 004C 004D 004E - 004F 0050 0051 0052 |KLMNOPQR|
|
||||
00000068 0053 0054 0055 0056 - 0057 0058 0059 005A |STUVWXYZ|
|
||||
00000070 005B 005C 005D 005E - 005F 0060 0061 0062 |[\]^_`ab|
|
||||
00000078 0063 0064 0065 0066 - 0067 0068 0069 006A |cdefghij|
|
||||
00000080 006B 006C 006D 006E - 006F 0070 0071 0072 |klmnopqr|
|
||||
00000088 0073 0074 0075 0076 - 0077 0078 0079 007A |stuvwxyz|
|
||||
00000090 007B 007C 007D 007E - 007F 0080 0081 0082 |{|}~....|
|
||||
00000098 0083 0084 0085 0086 - 0087 0088 0089 008A |........|
|
||||
000000A0 008B 008C 008D 008E - 008F 0090 0091 0092 |........|
|
||||
000000A8 0093 0094 0095 0096 - 0097 0098 0099 009A |........|
|
||||
000000B0 009B 009C 009D 009E - 009F 00A0 00A1 00A2 |..... ¡¢|
|
||||
000000B8 00A3 00A4 00A5 00A6 - 00A7 00A8 00A9 00AA |£¤¥¦§¨©ª|
|
||||
000000C0 00AB 00AC 00AD 00AE - 00AF 00B0 00B1 00B2 |«¬®¯°±²|
|
||||
000000C8 00B3 00B4 00B5 00B6 - 00B7 00B8 00B9 00BA |³´µ¶·¸¹º|
|
||||
000000D0 00BB 00BC 00BD 00BE - 00BF 00C0 00C1 00C2 |»¼½¾¿ÀÁÂ|
|
||||
000000D8 00C3 00C4 00C5 00C6 - 00C7 00C8 00C9 00CA |ÃÄÅÆÇÈÉÊ|
|
||||
000000E0 00CB 00CC 00CD 00CE - 00CF 00D0 00D1 00D2 |ËÌÍÎÏÐÑÒ|
|
||||
000000E8 00D3 00D4 00D5 00D6 - 00D7 00D8 00D9 00DA |ÓÔÕÖ×ØÙÚ|
|
||||
000000F0 00DB 00DC 00DD 00DE - 00DF 00E0 00E1 00E2 |ÛÜÝÞßàáâ|
|
||||
000000F8 00E3 00E4 00E5 00E6 - 00E7 00E8 00E9 00EA |ãäåæçèéê|
|
||||
00000100 00EB 00EC 00ED 00EE - 00EF 00F0 00F1 00F2 |ëìíîïðñò|
|
||||
00000108 00F3 00F4 00F5 00F6 - 00F7 00F8 00F9 00FA |óôõö÷øùú|
|
||||
00000110 00FB 00FC 00FD 00FE - 00FF 000A EFFF 000A |ûüýþÿ.<EFBFBD>.|
|
||||
00000118 EFC2 000A EFEF EFBF - EFC0 000A EFC0 EF80 |<EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>.<EFBFBD><EFBFBD>|
|
||||
00000120 000A EFE0 EF80 EF80 - 000A FFFD EFEF EFBF |.<EFBFBD><EFBFBD><EFBFBD>.<EFBFBD><EFBFBD><EFBFBD>|
|
||||
00000128 EFBE EFEF EFBF EFBF - 000A |<EFBFBD><EFBFBD><EFBFBD><EFBFBD>.|
|
||||
---
|
||||
name: ulimit-1
|
||||
description:
|
||||
|
48
eval.c
48
eval.c
@ -2,7 +2,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.54 2009/04/07 18:41:35 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.55 2009/05/16 15:53:01 tg Exp $");
|
||||
|
||||
#ifdef MKSH_SMALL
|
||||
#define MKSH_NOPWNAM
|
||||
@ -51,6 +51,35 @@ static char *tilde(char *);
|
||||
static char *homedir(char *);
|
||||
#endif
|
||||
static void alt_expand(XPtrV *, char *, char *, char *, int);
|
||||
static size_t utflen(const char *);
|
||||
static void utfincptr(const char *, mksh_ari_t *);
|
||||
|
||||
/* UTFMODE functions */
|
||||
static size_t
|
||||
utflen(const char *s)
|
||||
{
|
||||
size_t n;
|
||||
|
||||
if (UTFMODE) {
|
||||
n = 0;
|
||||
while (*s) {
|
||||
s += utf_ptradj(s);
|
||||
++n;
|
||||
}
|
||||
} else
|
||||
n = strlen(s);
|
||||
return (n);
|
||||
}
|
||||
|
||||
static void
|
||||
utfincptr(const char *s, mksh_ari_t *lp)
|
||||
{
|
||||
const char *cp = s;
|
||||
|
||||
while ((*lp)--)
|
||||
cp += utf_ptradj(cp);
|
||||
*lp = cp - s;
|
||||
}
|
||||
|
||||
/* compile and expand word */
|
||||
char *
|
||||
@ -314,7 +343,7 @@ expand(const char *cp, /* input word */
|
||||
switch (stype & 0x7f) {
|
||||
case '0': {
|
||||
char *beg, *mid, *end, *stg;
|
||||
mksh_ari_t from = 0, num = -1, flen;
|
||||
mksh_ari_t from = 0, num = -1, flen, finc = 0;
|
||||
|
||||
/* ! DOBLANK,DOBRACE_,DOTILDE */
|
||||
f = DOPAT | (f&DONTRUNCOMMAND) |
|
||||
@ -347,15 +376,20 @@ expand(const char *cp, /* input word */
|
||||
}
|
||||
afree(beg, ATEMP);
|
||||
beg = str_val(st->var);
|
||||
flen = strlen(beg);
|
||||
flen = utflen(beg);
|
||||
if (from < 0) {
|
||||
if (-from < flen)
|
||||
beg += flen + from;
|
||||
finc = flen + from;
|
||||
} else
|
||||
beg += from < flen ? from : flen;
|
||||
flen = strlen(beg);
|
||||
finc = from < flen ? from : flen;
|
||||
// if (UTFMODE)
|
||||
utfincptr(beg, &finc);
|
||||
beg += finc;
|
||||
flen = utflen(beg);
|
||||
if (num < 0 || num > flen)
|
||||
num = flen;
|
||||
// if (UTFMODE)
|
||||
utfincptr(beg, &num);
|
||||
strndupx(x.str, beg, num, ATEMP);
|
||||
goto do_CSUBST;
|
||||
}
|
||||
@ -913,7 +947,7 @@ varsub(Expand *xp, const char *sp, const char *word,
|
||||
else {
|
||||
p = str_val(global(sp));
|
||||
zero_ok = p != null;
|
||||
c = strlen(p);
|
||||
c = utflen(p);
|
||||
}
|
||||
if (Flag(FNOUNSET) && c == 0 && !zero_ok)
|
||||
errorf("%s: parameter not set", sp);
|
||||
|
10
mksh.1
10
mksh.1
@ -1,4 +1,4 @@
|
||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.163 2009/05/16 14:45:40 tg Exp $
|
||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.164 2009/05/16 15:53:01 tg Exp $
|
||||
.\" $OpenBSD: ksh.1,v 1.128 2009/03/06 12:28:36 jmc Exp $
|
||||
.\"-
|
||||
.\" Try to make GNU groff and AT&T nroff more compatible
|
||||
@ -1229,7 +1229,9 @@ The number of positional parameters if
|
||||
is
|
||||
.Ql * ,
|
||||
.Ql @ ,
|
||||
or not specified; otherwise the length of the string value of parameter
|
||||
or not specified; otherwise the length
|
||||
.Pq in characters
|
||||
of the string value of parameter
|
||||
.Ar name .
|
||||
.Pp
|
||||
.It Pf ${# Ns Ar name Ns [*]}
|
||||
@ -1309,7 +1311,7 @@ is replaced by the empty string, i.e. deleted.
|
||||
.Sm on
|
||||
The first
|
||||
.Ar len
|
||||
bytes of
|
||||
characters of
|
||||
.Ar name ,
|
||||
starting at position
|
||||
.Ar pos ,
|
||||
@ -5612,7 +5614,7 @@ all contributors, such as the Debian and OpenBSD projects.
|
||||
See the documentation, CVS, and web site for details.
|
||||
.Sh BUGS
|
||||
This document attempts to describe
|
||||
.Nm mksh\ R37c
|
||||
.Nm mksh\ R38
|
||||
and up,
|
||||
compiled without any options impacting functionality, such as
|
||||
.Dv MKSH_SMALL ,
|
||||
|
4
sh.h
4
sh.h
@ -102,9 +102,9 @@
|
||||
#define __SCCSID(x) __IDSTRING(sccsid,x)
|
||||
|
||||
#ifdef EXTERN
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.295 2009/05/16 15:09:07 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.296 2009/05/16 15:53:02 tg Exp $");
|
||||
#endif
|
||||
#define MKSH_VERSION "R37 2009/05/16"
|
||||
#define MKSH_VERSION "R38 2009/05/16"
|
||||
|
||||
#ifndef MKSH_INCLUDES_ONLY
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user