$'…' functionality, documentation improvements, fixes for backslash

expansion in all modes, regression tests for both kinds of backslash
expansion; unbksl() revamp; make CTRL macro available globally
This commit is contained in:
tg 2009-09-19 21:54:46 +00:00
parent c8eb13a13f
commit 9fd4b9db41
7 changed files with 347 additions and 65 deletions

122
check.t
View File

@ -1,4 +1,4 @@
# $MirOS: src/bin/mksh/check.t,v 1.306 2009/09/19 18:36:57 tg Exp $ # $MirOS: src/bin/mksh/check.t,v 1.307 2009/09/19 21:54:42 tg Exp $
# $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: bksl-nl.t,v 1.2 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $ # $OpenBSD: history.t,v 1.5 2001/01/28 23:04:56 niklas Exp $
# $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $ # $OpenBSD: read.t,v 1.3 2003/03/10 03:48:16 david Exp $
@ -25,7 +25,7 @@
# http://www.research.att.com/~gsf/public/ifs.sh # http://www.research.att.com/~gsf/public/ifs.sh
expected-stdout: expected-stdout:
@(#)MIRBSD KSH R39 2009/09/07 @(#)MIRBSD KSH R39 2009/09/19
description: description:
Check version of shell. Check version of shell.
stdin: stdin:
@ -5210,6 +5210,14 @@ stdin:
expected-stdout: expected-stdout:
<däÛÃâ¬Ã@> <däÛÃâ¬Ã@>
--- ---
name: print-bksl-c
description:
Check print builtin's \c escape
stdin:
print '\ca'; print b
expected-stdout:
ab
---
name: print-nul-chars name: print-nul-chars
description: description:
Check handling of NUL characters for print and read Check handling of NUL characters for print and read
@ -5223,6 +5231,116 @@ expected-stdout:
4 4
3 2 3 2
--- ---
name: print-escapes
description:
Check backslash expansion by the print builtin
stdin:
print '\ \!\"\#\$\%\&'\\\''\(\)\*\+\,\-\.\/\0\1\2\3\4\5\6\7\8' \
'\9\:\;\<\=\>\?\@\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T' \
'\U\V\W\X\Y\Z\[\\\]\^\_\`\a\b \d\e\f\g\h\i\j\k\l\m\n\o\p' \
'\q\r\s\t\u\v\w\x\y\z\{\|\}\~' '\u20acd' '\U20acd' '\x123' \
'\0x' '\0123' '\01234' | {
typeset -Uui16 -Z11 pos=0
typeset -Uui16 -Z5 hv
typeset -i1 wc=0x0A
dasc=
nl=${wc#1#}
while IFS= read -r line; do
line=$line$nl
while [[ -n $line ]]; do
hv=1#${line::1}
if (( (pos & 15) == 0 )); then
(( pos )) && print "$dasc|"
print -n "${pos#16#} "
dasc=' |'
fi
print -n "${hv#16#} "
if (( (hv < 32) || (hv > 126) )); then
dasc=$dasc.
else
dasc=$dasc${line::1}
fi
(( (pos++ & 15) == 7 )) && print -n -- '- '
line=${line:1}
done
done
if (( (pos & 15) != 1 )); then
while (( pos & 15 )); do
print -n ' '
(( (pos++ & 15) == 7 )) && print -n -- '- '
done
print "$dasc|"
fi
}
expected-stdout:
00000000 5C 20 5C 21 5C 22 5C 23 - 5C 24 5C 25 5C 26 5C 27 |\ \!\"\#\$\%\&\'|
00000010 5C 28 5C 29 5C 2A 5C 2B - 5C 2C 5C 2D 5C 2E 5C 2F |\(\)\*\+\,\-\.\/|
00000020 5C 31 5C 32 5C 33 5C 34 - 5C 35 5C 36 5C 37 5C 38 |\1\2\3\4\5\6\7\8|
00000030 20 5C 39 5C 3A 5C 3B 5C - 3C 5C 3D 5C 3E 5C 3F 5C | \9\:\;\<\=\>\?\|
00000040 40 5C 41 5C 42 5C 43 5C - 44 1B 5C 46 5C 47 5C 48 |@\A\B\C\D.\F\G\H|
00000050 5C 49 5C 4A 5C 4B 5C 4C - 5C 4D 5C 4E 5C 4F 5C 50 |\I\J\K\L\M\N\O\P|
00000060 5C 51 5C 52 5C 53 5C 54 - 20 5C 56 5C 57 5C 58 5C |\Q\R\S\T \V\W\X\|
00000070 59 5C 5A 5C 5B 5C 5C 5D - 5C 5E 5C 5F 5C 60 07 08 |Y\Z\[\]\^\_\`..|
00000080 20 20 5C 64 1B 0C 5C 67 - 5C 68 5C 69 5C 6A 5C 6B | \d..\g\h\i\j\k|
00000090 5C 6C 5C 6D 0A 5C 6F 5C - 70 20 5C 71 0D 5C 73 09 |\l\m.\o\p \q.\s.|
000000A0 0B 5C 77 5C 79 5C 7A 5C - 7B 5C 7C 5C 7D 5C 7E 20 |.\w\y\z\{\|\}\~ |
000000B0 E2 82 AC 64 20 EF BF BD - 20 12 33 20 78 20 53 20 |...d ... .3 x S |
000000C0 53 34 0A - |S4.|
---
name: dollar-quoted-strings
description:
Check backslash expansion by $'' strings
stdin:
printf '%s\n' $'\ \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/ \1\2\3\4\5\6' \
$'a\0b' $'a\01b' $'\7\8\9\:\;\<\=\>\?\@\A\B\C\D\E\F\G\H\I' \
$'\J\K\L\M\N\O\P\Q\R\S\T\U1\V\W\X\Y\Z\[\\\]\^\_\`\a\b\d\e' \
$'\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u1\v\w\x1\y\z\{\|\}\~ $x' \
$'\u20acd' $'\U20acd' $'\x123' $'fn\x0rd' $'\0234' $'\234' \
$'\2345' $'\ca' $'\c!' $'\c?' $'\c' $'a\
b' | {
typeset -Uui16 -Z11 pos=0
typeset -Uui16 -Z5 hv
typeset -i1 wc=0x0A
dasc=
nl=${wc#1#}
while IFS= read -r line; do
line=$line$nl
while [[ -n $line ]]; do
hv=1#${line::1}
if (( (pos & 15) == 0 )); then
(( pos )) && print "$dasc|"
print -n "${pos#16#} "
dasc=' |'
fi
print -n "${hv#16#} "
if (( (hv < 32) || (hv > 126) )); then
dasc=$dasc.
else
dasc=$dasc${line::1}
fi
(( (pos++ & 15) == 7 )) && print -n -- '- '
line=${line:1}
done
done
if (( (pos & 15) != 1 )); then
while (( pos & 15 )); do
print -n ' '
(( (pos++ & 15) == 7 )) && print -n -- '- '
done
print "$dasc|"
fi
}
expected-stdout:
00000000 20 21 22 23 24 25 26 27 - 28 29 2A 2B 2C 2D 2E 2F | !"#$%&'()*+,-./|
00000010 20 01 02 03 04 05 06 0A - 61 0A 61 01 62 0A 07 38 | .......a.a.b..8|
00000020 39 3A 3B 3C 3D 3E 3F 40 - 41 42 43 44 1B 46 47 48 |9:;<=>?@ABCD.FGH|
00000030 49 0A 4A 4B 4C 4D 4E 4F - 50 51 52 53 54 01 56 57 |I.JKLMNOPQRST.VW|
00000040 58 59 5A 5B 5C 5D 5E 5F - 60 07 08 64 1B 0A 0C 67 |XYZ[\]^_`..d...g|
00000050 68 69 6A 6B 6C 6D 0A 6F - 70 71 0D 73 09 01 0B 77 |hijklm.opq.s...w|
00000060 01 79 7A 7B 7C 7D 7E 20 - 24 78 0A E2 82 AC 64 0A |.yz{|}~ $x....d.|
00000070 EF BF BD 0A C4 A3 0A 66 - 6E 0A 13 34 0A 9C 0A 9C |.......fn..4....|
00000080 35 0A 01 0A 01 0A 7F 0A - 02 82 AC 0A 61 0A 62 0A |5...........a.b.|
---
name: dot-needs-argument name: dot-needs-argument
description: description:
check Debian #415167 solution: '.' without arguments should fail check Debian #415167 solution: '.' without arguments should fail

6
edit.c
View File

@ -25,7 +25,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/edit.c,v 1.175 2009/08/28 20:30:54 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/edit.c,v 1.176 2009/09/19 21:54:43 tg Exp $");
/* tty driver characters we are interested in */ /* tty driver characters we are interested in */
typedef struct { typedef struct {
@ -964,10 +964,6 @@ utf_wctomb(char *dst, unsigned int wc)
static Area aedit; static Area aedit;
#define AEDIT &aedit /* area for kill ring and macro defns */ #define AEDIT &aedit /* area for kill ring and macro defns */
#undef CTRL
#define CTRL(x) ((x) == '?' ? 0x7F : (x) & 0x1F) /* ASCII */
#define UNCTRL(x) ((x) ^ 0x40) /* ASCII */
/* values returned by keyboard functions */ /* values returned by keyboard functions */
#define KSTD 0 #define KSTD 0
#define KEOL 1 /* ^M, ^J */ #define KEOL 1 /* ^M, ^J */

25
funcs.c
View File

@ -25,7 +25,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.132 2009/09/19 19:08:46 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.133 2009/09/19 21:54:44 tg Exp $");
#if HAVE_KILLPG #if HAVE_KILLPG
/* /*
@ -495,6 +495,10 @@ c_pwd(const char **wp)
return (0); return (0);
} }
static const char *s_ptr;
static int s_get(void);
static void s_put(int);
int int
c_print(const char **wp) c_print(const char **wp)
{ {
@ -604,7 +608,10 @@ c_print(const char **wp)
while ((c = *s++) != '\0') { while ((c = *s++) != '\0') {
Xcheck(xs, xp); Xcheck(xs, xp);
if ((flags & PO_EXPAND) && c == '\\') { if ((flags & PO_EXPAND) && c == '\\') {
if ((c = unbksl(&s, false)) == -1) { s_ptr = s;
c = unbksl(false, s_get, s_put);
s = s_ptr;
if (c == -1) {
/* rejected by generic function */ /* rejected by generic function */
switch ((c = *s++)) { switch ((c = *s++)) {
case 'c': case 'c':
@ -618,7 +625,7 @@ c_print(const char **wp)
default: default:
Xput(xs, xp, '\\'); Xput(xs, xp, '\\');
} }
} else if (c > 0xFF) { } else if ((unsigned int)c > 0xFF) {
/* generic function returned Unicode */ /* generic function returned Unicode */
char ts[4]; char ts[4];
@ -678,6 +685,18 @@ c_print(const char **wp)
return (0); return (0);
} }
static int
s_get(void)
{
return (*s_ptr++);
}
static void
s_put(int c __unused)
{
--s_ptr;
}
int int
c_whence(const char **wp) c_whence(const char **wp)
{ {

79
lex.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.93 2009/08/28 22:39:09 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.94 2009/09/19 21:54:45 tg Exp $");
/* /*
* states while lexing word * states while lexing word
@ -43,6 +43,7 @@ __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.93 2009/08/28 22:39:09 tg Exp $");
#define SLETARRAY 13 /* inside =( ), just copy */ #define SLETARRAY 13 /* inside =( ), just copy */
#define SADELIM 14 /* like SBASE, looking for delimiter */ #define SADELIM 14 /* like SBASE, looking for delimiter */
#define SHERESTRING 15 /* parsing <<< string */ #define SHERESTRING 15 /* parsing <<< string */
#define SEQUOTE 16 /* inside $'' */
/* Structure to keep track of the lexing state and the various pieces of info /* Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state. */ * needed for each particular state. */
@ -94,6 +95,12 @@ struct lex_state {
#define ls_sadelim ls_info.u_sadelim #define ls_sadelim ls_info.u_sadelim
} u_sadelim; } u_sadelim;
/* $'...' */
struct sequote_info {
bool got_NUL; /* ignore rest of string */
#define ls_sequote ls_info.u_sequote
} u_sequote;
Lex_state *base; /* used to point to next state block */ Lex_state *base; /* used to point to next state block */
} ls_info; } ls_info;
}; };
@ -107,6 +114,8 @@ static void readhere(struct ioword *);
static int getsc__(void); static int getsc__(void);
static void getsc_line(Source *); static void getsc_line(Source *);
static int getsc_bn(void); static int getsc_bn(void);
static int s_get(void);
static void s_put(int);
static char *get_brace_var(XString *, char *); static char *get_brace_var(XString *, char *);
static int arraysub(char **); static int arraysub(char **);
static const char *ungetsc(int); static const char *ungetsc(int);
@ -154,11 +163,10 @@ yylex(int cf)
{ {
Lex_state states[STATE_BSIZE], *statep, *s2, *base; Lex_state states[STATE_BSIZE], *statep, *s2, *base;
State_info state_info; State_info state_info;
int c, state; int c, c2, state;
XString ws; /* expandable output word */ XString ws; /* expandable output word */
char *wp; /* output word pointer */ char *wp; /* output word pointer */
char *sp, *dp; char *sp, *dp;
int c2;
Again: Again:
states[0].ls_state = -1; states[0].ls_state = -1;
@ -427,6 +435,12 @@ yylex(int cf)
*wp++ = '\0'; *wp++ = '\0';
*wp++ = CSUBST; *wp++ = CSUBST;
*wp++ = 'X'; *wp++ = 'X';
} else if (c == '\'') {
*wp++ = OQUOTE;
ignore_backslash_newline++;
PUSH_STATE(SEQUOTE);
statep->ls_sequote.got_NUL = false;
break;
} else { } else {
*wp++ = CHAR, *wp++ = '$'; *wp++ = CHAR, *wp++ = '$';
ungetsc(c); ungetsc(c);
@ -484,6 +498,33 @@ yylex(int cf)
} }
break; break;
case SEQUOTE:
if (c == '\'') {
POP_STATE();
*wp++ = CQUOTE;
ignore_backslash_newline--;
} else if (c == '\\') {
if ((c2 = unbksl(true, s_get, s_put)) == -1)
c2 = s_get();
if (c2 == 0 || c2 == 0x100)
statep->ls_sequote.got_NUL = true;
if (!statep->ls_sequote.got_NUL) {
char ts[4];
if ((unsigned int)c2 < 0x100)
*wp++ = QCHAR, *wp++ = c2;
else {
c = utf_wctomb(ts, c2 - 0x100);
ts[c] = 0;
for (c = 0; ts[c]; ++c)
*wp++ = QCHAR, \
*wp++ = ts[c];
}
}
} else if (!statep->ls_sequote.got_NUL)
*wp++ = QCHAR, *wp++ = c;
break;
case SSQUOTE: case SSQUOTE:
if (c == '\'') { if (c == '\'') {
POP_STATE(); POP_STATE();
@ -690,8 +731,17 @@ yylex(int cf)
} }
/* invoke quoting mode */ /* invoke quoting mode */
Xstring(ws, wp)[0] = QCHAR; Xstring(ws, wp)[0] = QCHAR;
} else if (c == '$') {
if ((c2 = getsc()) == '\'') {
PUSH_STATE(SEQUOTE);
statep->ls_sequote.got_NUL = false;
goto sherestring_quoted;
}
ungetsc(c2);
goto sherestring_regular;
} else if (c == '\'') { } else if (c == '\'') {
PUSH_STATE(SSQUOTE); PUSH_STATE(SSQUOTE);
sherestring_quoted:
*wp++ = OQUOTE; *wp++ = OQUOTE;
ignore_backslash_newline++; ignore_backslash_newline++;
/* invoke quoting mode */ /* invoke quoting mode */
@ -701,6 +751,7 @@ yylex(int cf)
*wp++ = OQUOTE; *wp++ = OQUOTE;
/* just don't IFS split; no quoting mode */ /* just don't IFS split; no quoting mode */
} else { } else {
sherestring_regular:
*wp++ = CHAR; *wp++ = CHAR;
*wp++ = c; *wp++ = c;
} }
@ -721,14 +772,24 @@ yylex(int cf)
*wp++ = QCHAR; *wp++ = QCHAR;
*wp++ = c; *wp++ = c;
} }
} else if (c == '$') {
if ((c2 = getsc()) == '\'') {
PUSH_STATE(SEQUOTE);
statep->ls_sequote.got_NUL = false;
goto sheredelim_quoted;
}
ungetsc(c2);
goto sheredelim_regular;
} else if (c == '\'') { } else if (c == '\'') {
PUSH_STATE(SSQUOTE); PUSH_STATE(SSQUOTE);
sheredelim_quoted:
*wp++ = OQUOTE; *wp++ = OQUOTE;
ignore_backslash_newline++; ignore_backslash_newline++;
} else if (c == '"') { } else if (c == '"') {
state = statep->ls_state = SHEREDQUOTE; state = statep->ls_state = SHEREDQUOTE;
*wp++ = OQUOTE; *wp++ = OQUOTE;
} else { } else {
sheredelim_regular:
*wp++ = CHAR; *wp++ = CHAR;
*wp++ = c; *wp++ = c;
} }
@ -1604,3 +1665,15 @@ pop_state_(State_info *si, Lex_state *old_end)
return (si->base + STATE_BSIZE - 1); return (si->base + STATE_BSIZE - 1);
} }
static int
s_get(void)
{
return (getsc());
}
static void
s_put(int c)
{
ungetsc(c);
}

69
misc.c
View File

@ -29,7 +29,7 @@
#include <grp.h> #include <grp.h>
#endif #endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.120 2009/09/19 19:08:47 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/misc.c,v 1.121 2009/09/19 21:54:45 tg Exp $");
#undef USE_CHVT #undef USE_CHVT
/* XXX conditions correct? */ /* XXX conditions correct? */
@ -1449,19 +1449,20 @@ getrusage(int what, struct rusage *ru)
#endif #endif
/* /*
* process the string at *sp for backslash escapes, * process the string available via fg (get a char)
* assuming (*sp)[-1] was the backslash; return the * and fp (put back a char) for backslash escapes,
* character ([0;0xFF]), Unicode (wc+0x100), or -1 * assuming the first call to *fg gets the char di-
* if none found; *sp afterwards points to the first * rectly after the backslash; return the character
* unprocessed character (unchanged if rv=-1) * (0..0xFF), Unicode (wc + 0x100), or -1 if no known
* escape sequence was found
*/ */
int int
unbksl(const char **sp, bool cstyle) unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
{ {
int wc, i; int wc, i, c, fc;
const char *cp = (*sp);
switch (*cp++) { fc = (*fg)();
switch (fc) {
case 'a': case 'a':
/* /*
* according to the comments in pdksh, \007 seems * according to the comments in pdksh, \007 seems
@ -1474,6 +1475,16 @@ unbksl(const char **sp, bool cstyle)
case 'b': case 'b':
wc = '\b'; wc = '\b';
break; break;
case 'c':
if (!cstyle)
goto unknown_escape;
c = (*fg)();
wc = CTRL(c);
break;
case 'E':
case 'e':
wc = 033;
break;
case 'f': case 'f':
wc = '\f'; wc = '\f';
break; break;
@ -1497,14 +1508,12 @@ unbksl(const char **sp, bool cstyle)
case '5': case '5':
case '6': case '6':
case '7': case '7':
case '8':
case '9':
if (!cstyle) if (!cstyle)
return (-1); goto unknown_escape;
/* FALLTHROUGH */ /* FALLTHROUGH */
case '0': case '0':
if (cstyle) if (cstyle)
--cp; (*fp)(fc);
/* /*
* look for an octal number with up to three * look for an octal number with up to three
* digits, not counting the leading zero; * digits, not counting the leading zero;
@ -1512,8 +1521,13 @@ unbksl(const char **sp, bool cstyle)
*/ */
wc = 0; wc = 0;
i = 3; i = 3;
while (i-- && *cp >= '0' && *cp <= '7') while (i--)
wc = (wc << 3) + (*cp++ - '0'); if ((c = (*fg)()) >= '0' && c <= '7')
wc = (wc << 3) + (c - '0');
else {
(*fp)(c);
break;
}
break; break;
case 'U': case 'U':
i = 8; i = 8;
@ -1535,28 +1549,35 @@ unbksl(const char **sp, bool cstyle)
wc = 0; wc = 0;
while (i--) { while (i--) {
wc <<= 4; wc <<= 4;
if (*cp >= '0' && *cp <= '9') if ((c = (*fg)()) >= '0' && c <= '9')
wc += *cp++ - '0'; wc += c - '0';
else if (*cp >= 'A' && *cp <= 'F') else if (c >= 'A' && c <= 'F')
wc += *cp++ - 'A' + 10; wc += c - 'A' + 10;
else if (*cp >= 'a' && *cp <= 'f') else if (c >= 'a' && c <= 'f')
wc += *cp++ - 'a' + 10; wc += c - 'a' + 10;
else { else {
wc >>= 4; wc >>= 4;
(*fp)(c);
break; break;
} }
} }
if (cstyle || **sp != 'x') if (cstyle || fc != 'x')
/* Unicode marker */ /* Unicode marker */
wc += 0x100; wc += 0x100;
break; break;
case '\'':
if (!cstyle)
goto unknown_escape;
wc = '\'';
break;
case '\\': case '\\':
wc = '\\'; wc = '\\';
break; break;
default: default:
unknown_escape:
(*fp)(fc);
return (-1); return (-1);
} }
(*sp) = cp;
return (wc); return (wc);
} }

101
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.187 2009/09/19 19:08:47 tg Exp $ .\" $MirOS: src/bin/mksh/mksh.1,v 1.188 2009/09/19 21:54:45 tg Exp $
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $ .\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
.\"- .\"-
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@ -877,6 +877,12 @@ the
and the newline are stripped; otherwise, both the and the newline are stripped; otherwise, both the
.Ql \e .Ql \e
and the character following are unchanged. and the character following are unchanged.
.Pp
If a single-quoted string is preceded by an unquoted
.Ql $ ,
C style backslash expansion (see below) is applied (even single quote
characters inside can be escaped and do not terminate the string then);
the expanded result is treated as any other single-quoted string.
.Ss Backslash expansion .Ss Backslash expansion
In places where backslashes are expanded, certain C and In places where backslashes are expanded, certain C and
.At .At
@ -884,11 +890,6 @@ In places where backslashes are expanded, certain C and
or GNU or GNU
.Nm bash .Nm bash
style escapes are translated. style escapes are translated.
Explicitly excluded are
.Ql \e" ,
.Ql \e' ,
and
.Ql \e? .
These include These include
.Ql \ea , .Ql \ea ,
.Ql \eb , .Ql \eb ,
@ -898,16 +899,62 @@ These include
.Ql \et , .Ql \et ,
.Ql \eU######## , .Ql \eU######## ,
.Ql \eu#### , .Ql \eu#### ,
.Ql \ev ,
.Ql \ex## ,
and and
.Ql \e0### ; .Ql \ev .
.Ql # For
is, in the case of \e0###, an octal, or, in the case of \ex##, .Ql \eU########
\eu#### or \eU########, a hexadecimal digit, of which there may and
be none up to two (x), three (0), four (u), or eight (U). .Ql \eu#### ,
The \ex## and \e0### escapes translate to raw 8-bit octets; .Dq #
the \eu#### and \eU######## escapes translate a Unicode codepoint to UTF-8. means a hexadecimal digit, of thich there may be none up to four or eight;
these escapes translate a Unicode codepoint to UTF-8.
Furthermore,
.Ql \eE
and
.Ql \ee
expand to the escape character.
.Pp
In the
.Ic print
builtin mode,
.Ql \e" ,
.Ql \e\*(aq ,
and
.Ql \e?
are explicitly excluded;
octal sequences must have the none up to three octal digits
.Dq #
prefixed with the digit zero
.Pq Ql \e0### ;
hexadecimal sequences
.Ql \ex##
are limited to none up to two hexadecimal digits
.Dq # ;
both octal and hexadecimal sequences convert to raw octets;
.Ql \e# ,
where # is none of the above, translates to \e# (backslashes are retained).
.Pp
Backslash expansion in the C style mode slightly differs: octal sequences
.Ql \e###
must have no digit zero prefixing the one up to three octal digits
.Dq #
and yield raw octets; hexadecimal sequences
.Ql \ex#*
greedily eat up as many hexadecimal digits
.Dq #
as they can and terminate with the first non-hexadecimal digit;
these translate a Unicode codepoint to UTF-8.
The sequence
.Ql \ec# ,
where
.Dq #
is any octet, translates to Ctrl-# (which basically means,
.Ql \ec\*(TI
becomes DEL, everything else is bitwise ANDed with 0x1F).
Finally,
.Ql \e# ,
where # is none of the above, translates to # (has the backslash trimmed),
even if it is a newline.
.Ss Aliases .Ss Aliases
There are two types of aliases: normal command aliases and tracked aliases. There are two types of aliases: normal command aliases and tracked aliases.
Command aliases are normally used as a short hand for a long or often used Command aliases are normally used as a short hand for a long or often used
@ -1431,7 +1478,7 @@ Note that
may need to be escaped as an extended globbing pattern may need to be escaped as an extended globbing pattern
.Pq @(...) , .Pq @(...) ,
with single quotes with single quotes
.Pq \&'...\&' .Pq \&\*(aq...\&\*(aq
or double quotes or double quotes
.Pq \&"...\&" . .Pq \&"...\&" .
.Pp .Pp
@ -1706,7 +1753,7 @@ is (so they know how far it is to the edge of the screen), escape codes in
the prompt tend to mess things up. the prompt tend to mess things up.
You can tell the shell not to count certain You can tell the shell not to count certain
sequences (such as escape codes) by prefixing your prompt with a sequences (such as escape codes) by prefixing your prompt with a
character (such as control-A) followed by a carriage return and then delimiting character (such as Ctrl-A) followed by a carriage return and then delimiting
the escape codes with this character. the escape codes with this character.
Any occurences of that character in the prompt are not printed. Any occurences of that character in the prompt are not printed.
By the way, don't blame me for By the way, don't blame me for
@ -2773,12 +2820,12 @@ two prefices and the control character is ignored, any
other trailing character will be processed afterwards. other trailing character will be processed afterwards.
.Pp .Pp
Control characters may be written using caret notation Control characters may be written using caret notation
i.e. \*(haX represents Control-X. i.e. \*(haX represents Ctrl-X.
Note that although only two prefix characters (usually ESC and \*(haX) Note that although only two prefix characters (usually ESC and \*(haX)
are supported, some multi-character sequences can be supported. are supported, some multi-character sequences can be supported.
.Pp .Pp
The following default bindings show how the arrow keys, the home, end and The following default bindings show how the arrow keys, the home, end and
delete key on a BSD wsvt25, xterm-xfree86 or GNU screen terminal are bound delete key on a BSD wsvt25, xterm\-xfree86 or GNU screen terminal are bound
(of course some escape sequences won't work out quite this nicely): (of course some escape sequences won't work out quite this nicely):
.Bd -literal -offset indent .Bd -literal -offset indent
bind \*(aq\*(haX\*(aq=prefix\-2 bind \*(aq\*(haX\*(aq=prefix\-2
@ -3306,18 +3353,14 @@ The
.Fl n .Fl n
option suppresses the newline. option suppresses the newline.
By default, certain C escapes are translated. By default, certain C escapes are translated.
These include at least these mentioned in These include these mentioned in
.Sx Backslash expansion .Sx Backslash expansion
above, as well as above, as well as
.Ql \ec , .Ql \ec ,
which is equivalent to using the which is equivalent to using the
.Fl n .Fl n
option. option.
Contrary to C escapes, the backslash is retained if followed by an Backslash expansion may be inhibited with the
unsupported escape character; octal sequences must be preceded by a
.Sq 0 .
.Ql \e
expansion may be inhibited with the
.Fl r .Fl r
option. option.
The The
@ -5774,6 +5817,14 @@ The developer of mksh recognises the efforts of the pdksh authors,
who had dedicated their work into Public Domain, our users, and who had dedicated their work into Public Domain, our users, and
all contributors, such as the Debian and OpenBSD projects. all contributors, such as the Debian and OpenBSD projects.
See the documentation, CVS, and web site for details. See the documentation, CVS, and web site for details.
.Sh CAVEATS
.Nm
only supports the Unicode BMP (Basic Multilingual Plane).
Pipelines are executed in subshells.
It has a different scope model from
.At
.Nm ksh ,
which leads to subtile differences in semantics for identical builtins.
.Sh BUGS .Sh BUGS
This document attempts to describe This document attempts to describe
.Nm mksh\ R39+devel .Nm mksh\ R39+devel

10
sh.h
View File

@ -134,9 +134,9 @@
#endif #endif
#ifdef EXTERN #ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.342 2009/09/19 19:08:48 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/sh.h,v 1.343 2009/09/19 21:54:46 tg Exp $");
#endif #endif
#define MKSH_VERSION "R39 2009/09/07" #define MKSH_VERSION "R39 2009/09/19"
#ifndef MKSH_INCLUDES_ONLY #ifndef MKSH_INCLUDES_ONLY
@ -1339,6 +1339,10 @@ typedef union {
#define HERES 10 /* max << in line */ #define HERES 10 /* max << in line */
#undef CTRL
#define CTRL(x) ((x) == '?' ? 0x7F : (x) & 0x1F) /* ASCII */
#define UNCTRL(x) ((x) ^ 0x40) /* ASCII */
EXTERN Source *source; /* yyparse/yylex source */ EXTERN Source *source; /* yyparse/yylex source */
EXTERN YYSTYPE yylval; /* result from yylex */ EXTERN YYSTYPE yylval; /* result from yylex */
EXTERN struct ioword *heres [HERES], **herep; EXTERN struct ioword *heres [HERES], **herep;
@ -1587,7 +1591,7 @@ void set_current_wd(char *);
char *strdup_(const char *, Area *); char *strdup_(const char *, Area *);
char *strndup_(const char *, size_t, Area *); char *strndup_(const char *, size_t, Area *);
#endif #endif
int unbksl(const char **, bool); int unbksl(bool, int (*)(void), void (*)(int));
/* shf.c */ /* shf.c */
struct shf *shf_open(const char *, int, int, int); struct shf *shf_open(const char *, int, int, int);
struct shf *shf_fdopen(int, int, struct shf *); struct shf *shf_fdopen(int, int, struct shf *);