new function unbksl doing "backslash expansion" independent of c_print();

also make a separate subsection about it in the manpage
This commit is contained in:
tg 2009-09-19 15:16:05 +00:00
parent 3639137e48
commit e0f000fb83
4 changed files with 157 additions and 108 deletions

109
funcs.c
View File

@ -25,7 +25,7 @@
#include "sh.h"
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.130 2009/09/07 17:24:48 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.131 2009/09/19 15:16:02 tg Exp $");
#if HAVE_KILLPG
/*
@ -604,93 +604,30 @@ c_print(const char **wp)
while ((c = *s++) != '\0') {
Xcheck(xs, xp);
if ((flags & PO_EXPAND) && c == '\\') {
int i;
if ((c = unbksl(&s)) == -1) {
/* rejected by generic function */
switch ((c = *s++)) {
case 'c':
flags &= ~PO_NL;
/* AT&T brain damage */
continue;
case '\0':
s--;
c = '\\';
break;
default:
Xput(xs, xp, '\\');
}
} else if (c > 0xFF) {
/* generic function returned Unicode */
char ts[4];
switch ((c = *s++)) {
/* Oddly enough, \007 seems more portable than
* \a (due to HP-UX cc, Ultrix cc, old PCCs,
* etc.).
*/
case 'a': c = '\007'; break;
case 'b': c = '\b'; break;
case 'c':
flags &= ~PO_NL;
/* AT&T brain damage */
c = utf_wctomb(ts, c - 0x100);
ts[c] = 0;
for (c = 0; ts[c]; ++c)
Xput(xs, xp, ts[c]);
continue;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = 0x0B; break;
case '0':
/* Look for an octal number: can have
* three digits (not counting the
* leading 0). Truly burnt.
*/
c = 0;
for (i = 0; i < 3; i++) {
if (*s >= '0' && *s <= '7')
c = c*8 + *s++ - '0';
else
break;
}
break;
case 'x':
/* Look for a hexadecimal number of
* up to 2 digits, write raw octet.
*/
c = 0;
for (i = 0; i < 2; i++) {
c <<= 4;
if (*s >= '0' && *s <= '9')
c += *s++ - '0';
else if (*s >= 'A' && *s <= 'F')
c += *s++ - 'A' + 10;
else if (*s >= 'a' && *s <= 'f')
c += *s++ - 'a' + 10;
else {
c >>= 4;
break;
}
}
break;
case 'u':
/* Look for a hexadecimal number of
* up to 4 digits, write Unicode.
*/
c = 0;
for (i = 0; i < 4; i++) {
c <<= 4;
if (*s >= '0' && *s <= '9')
c += *s++ - '0';
else if (*s >= 'A' && *s <= 'F')
c += *s++ - 'A' + 10;
else if (*s >= 'a' && *s <= 'f')
c += *s++ - 'a' + 10;
else {
c >>= 4;
break;
}
}
if (c < 0x80)
/* Xput below writes ASCII */;
else if (c < 0x0800) {
Xput(xs, xp, (c >> 6) | 0xC0);
c = 0x80 | (c & 0x3F);
/* leave 2nd octet to below */
} else {
Xput(xs, xp, (c >> 12) | 0xE0);
Xput(xs, xp,
((c >> 6) & 0x3F) | 0x80);
c = 0x80 | (c & 0x3F);
/* leave 3rd octet to below */
}
break;
case '\0': s--; c = '\\'; break;
case '\\': break;
default:
Xput(xs, xp, '\\');
}
}
}
Xput(xs, xp, c);
}

100
misc.c
View File

@ -29,7 +29,7 @@
#include <grp.h>
#endif
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.118 2009/08/30 13:30:07 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.119 2009/09/19 15:16:03 tg Exp $");
#undef USE_CHVT
/* XXX conditions correct? */
@ -1447,3 +1447,101 @@ getrusage(int what, struct rusage *ru)
return (0);
}
#endif
/*
* process the string at *sp for backslash escapes,
* assuming (*sp)[-1] was the backslash; return the
* character ([0;0xFF]), Unicode (wc+0x100), or -1
* if none found; *sp afterwards points to the first
* unprocessed character (unchanged if rv=-1)
*/
int
unbksl(const char **sp)
{
int wc, i;
const char *cp = (*sp);
switch (*cp++) {
case 'a':
/*
* according to the comments in pdksh, \007 seems
* to be more portable than \a (due to HP-UX cc,
* Ultrix cc, old pcc, etc.) so we avoid the escape
* sequence altogether in mksh and assume ASCII
*/
wc = 7;
break;
case 'b':
wc = '\b';
break;
case 'f':
wc = '\f';
break;
case 'n':
wc = '\n';
break;
case 'r':
wc = '\r';
break;
case 't':
wc = '\t';
break;
case 'v':
/* assume ASCII here as well */
wc = 11;
break;
case '0':
/*
* look for an octal number with up to three
* digits, not counting the leading zero;
* convert it to a raw octet
*/
wc = 0;
i = 3;
while (i-- && *cp >= '0' && *cp <= '7')
wc = (wc << 3) + (*cp++ - '0');
break;
case 'U':
i = 8;
if (0)
/* FALLTHROUGH */
case 'u':
i = 4;
if (0)
/* FALLTHROUGH */
case 'x':
i = 2;
/*
* x: look for a hexadecimal number with up to
* two digits; convert to raw octet
* u: look for a hexadecimal number with up to
* four (U: eight) digits; convert to Unicode
*/
wc = 0;
while (i--) {
wc <<= 4;
if (*cp >= '0' && *cp <= '9')
wc += *cp++ - '0';
else if (*cp >= 'A' && *cp <= 'F')
wc += *cp++ - 'A' + 10;
else if (*cp >= 'a' && *cp <= 'f')
wc += *cp++ - 'a' + 10;
else {
wc >>= 4;
break;
}
}
if (**sp != 'x')
/* Unicode marker */
wc += 0x100;
break;
case '\\':
wc = '\\';
break;
default:
return (-1);
}
(*sp) = cp;
return (wc);
}

53
mksh.1
View File

@ -1,4 +1,4 @@
.\" $MirOS: src/bin/mksh/mksh.1,v 1.184 2009/09/07 17:24:49 tg Exp $
.\" $MirOS: src/bin/mksh/mksh.1,v 1.185 2009/09/19 15:16:04 tg Exp $
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
.\"-
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
@ -48,7 +48,7 @@
.el .xD \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8
..
.\"-
.Dd $Mdocdate: September 7 2009 $
.Dd $Mdocdate: September 19 2009 $
.Dt MKSH 1
.Os MirBSD
.Sh NAME
@ -868,6 +868,32 @@ the
and the newline are stripped; otherwise, both the
.Ql \e
and the character following are unchanged.
.Ss Backslash expansion
In places where backslashes are expanded, certain C and
.At
.Nm ksh
or GNU
.Nm bash
style escapes are translated.
These include
.Ql \ea ,
.Ql \eb ,
.Ql \ef ,
.Ql \en ,
.Ql \er ,
.Ql \et ,
.Ql \eU######## ,
.Ql \eu#### ,
.Ql \ev ,
.Ql \ex## ,
and
.Ql \e0### ;
.Ql #
is, in the case of \e0###, an octal, or, in the case of \ex##,
\eu#### or \eU########, a hexadecimal digit, of which there may
be none up to two (x), three (0), four (u), or eight (U).
The \ex## and \e0### escapes translate to raw 8-bit octets;
the \eu#### and \eU######## escapes translate a Unicode codepoint to UTF-8.
.Ss Aliases
There are two types of aliases: normal command aliases and tracked aliases.
Command aliases are normally used as a short hand for a long or often used
@ -3266,24 +3292,11 @@ The
.Fl n
option suppresses the newline.
By default, certain C escapes are translated.
These include
.Ql \eb ,
.Ql \ef ,
.Ql \en ,
.Ql \er ,
.Ql \et ,
.Ql \eu#### ,
.Ql \ev ,
.Ql \ex## ,
and
.Ql \e0### ;
.Ql #
is, in the case of \e0###, an octal, or, in the case of \eu####
or \ex##, a hexadecimal digit, of which there may be 0 to 2/3/4.
The \ex## and \e0### escapes translate to raw 8-bit octets;
the \eu#### escape translates a Unicode codepoint to UTF-8.
.Ql \ec
is equivalent to using the
These include at least these mentioned in
.Sx Backslash expansion
above, as well as
.Ql \ec ,
which is equivalent to using the
.Fl n
option.
.Ql \e

3
sh.h
View File

@ -134,7 +134,7 @@
#endif
#ifdef EXTERN
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.340 2009/09/07 17:24:49 tg Exp $");
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.341 2009/09/19 15:16:05 tg Exp $");
#endif
#define MKSH_VERSION "R39 2009/09/07"
@ -1587,6 +1587,7 @@ void set_current_wd(char *);
char *strdup_(const char *, Area *);
char *strndup_(const char *, size_t, Area *);
#endif
int unbksl(const char **);
/* shf.c */
struct shf *shf_open(const char *, int, int, int);
struct shf *shf_fdopen(int, int, struct shf *);