new function unbksl doing "backslash expansion" independent of c_print();
also make a separate subsection about it in the manpage
This commit is contained in:
parent
3639137e48
commit
e0f000fb83
109
funcs.c
109
funcs.c
@ -25,7 +25,7 @@
|
||||
|
||||
#include "sh.h"
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.130 2009/09/07 17:24:48 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.131 2009/09/19 15:16:02 tg Exp $");
|
||||
|
||||
#if HAVE_KILLPG
|
||||
/*
|
||||
@ -604,93 +604,30 @@ c_print(const char **wp)
|
||||
while ((c = *s++) != '\0') {
|
||||
Xcheck(xs, xp);
|
||||
if ((flags & PO_EXPAND) && c == '\\') {
|
||||
int i;
|
||||
if ((c = unbksl(&s)) == -1) {
|
||||
/* rejected by generic function */
|
||||
switch ((c = *s++)) {
|
||||
case 'c':
|
||||
flags &= ~PO_NL;
|
||||
/* AT&T brain damage */
|
||||
continue;
|
||||
case '\0':
|
||||
s--;
|
||||
c = '\\';
|
||||
break;
|
||||
default:
|
||||
Xput(xs, xp, '\\');
|
||||
}
|
||||
} else if (c > 0xFF) {
|
||||
/* generic function returned Unicode */
|
||||
char ts[4];
|
||||
|
||||
switch ((c = *s++)) {
|
||||
/* Oddly enough, \007 seems more portable than
|
||||
* \a (due to HP-UX cc, Ultrix cc, old PCCs,
|
||||
* etc.).
|
||||
*/
|
||||
case 'a': c = '\007'; break;
|
||||
case 'b': c = '\b'; break;
|
||||
case 'c':
|
||||
flags &= ~PO_NL;
|
||||
/* AT&T brain damage */
|
||||
c = utf_wctomb(ts, c - 0x100);
|
||||
ts[c] = 0;
|
||||
for (c = 0; ts[c]; ++c)
|
||||
Xput(xs, xp, ts[c]);
|
||||
continue;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case 'v': c = 0x0B; break;
|
||||
case '0':
|
||||
/* Look for an octal number: can have
|
||||
* three digits (not counting the
|
||||
* leading 0). Truly burnt.
|
||||
*/
|
||||
c = 0;
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (*s >= '0' && *s <= '7')
|
||||
c = c*8 + *s++ - '0';
|
||||
else
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'x':
|
||||
/* Look for a hexadecimal number of
|
||||
* up to 2 digits, write raw octet.
|
||||
*/
|
||||
c = 0;
|
||||
for (i = 0; i < 2; i++) {
|
||||
c <<= 4;
|
||||
if (*s >= '0' && *s <= '9')
|
||||
c += *s++ - '0';
|
||||
else if (*s >= 'A' && *s <= 'F')
|
||||
c += *s++ - 'A' + 10;
|
||||
else if (*s >= 'a' && *s <= 'f')
|
||||
c += *s++ - 'a' + 10;
|
||||
else {
|
||||
c >>= 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
/* Look for a hexadecimal number of
|
||||
* up to 4 digits, write Unicode.
|
||||
*/
|
||||
c = 0;
|
||||
for (i = 0; i < 4; i++) {
|
||||
c <<= 4;
|
||||
if (*s >= '0' && *s <= '9')
|
||||
c += *s++ - '0';
|
||||
else if (*s >= 'A' && *s <= 'F')
|
||||
c += *s++ - 'A' + 10;
|
||||
else if (*s >= 'a' && *s <= 'f')
|
||||
c += *s++ - 'a' + 10;
|
||||
else {
|
||||
c >>= 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (c < 0x80)
|
||||
/* Xput below writes ASCII */;
|
||||
else if (c < 0x0800) {
|
||||
Xput(xs, xp, (c >> 6) | 0xC0);
|
||||
c = 0x80 | (c & 0x3F);
|
||||
/* leave 2nd octet to below */
|
||||
} else {
|
||||
Xput(xs, xp, (c >> 12) | 0xE0);
|
||||
Xput(xs, xp,
|
||||
((c >> 6) & 0x3F) | 0x80);
|
||||
c = 0x80 | (c & 0x3F);
|
||||
/* leave 3rd octet to below */
|
||||
}
|
||||
break;
|
||||
case '\0': s--; c = '\\'; break;
|
||||
case '\\': break;
|
||||
default:
|
||||
Xput(xs, xp, '\\');
|
||||
}
|
||||
}
|
||||
}
|
||||
Xput(xs, xp, c);
|
||||
}
|
||||
|
100
misc.c
100
misc.c
@ -29,7 +29,7 @@
|
||||
#include <grp.h>
|
||||
#endif
|
||||
|
||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.118 2009/08/30 13:30:07 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.119 2009/09/19 15:16:03 tg Exp $");
|
||||
|
||||
#undef USE_CHVT
|
||||
/* XXX conditions correct? */
|
||||
@ -1447,3 +1447,101 @@ getrusage(int what, struct rusage *ru)
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* process the string at *sp for backslash escapes,
|
||||
* assuming (*sp)[-1] was the backslash; return the
|
||||
* character ([0;0xFF]), Unicode (wc+0x100), or -1
|
||||
* if none found; *sp afterwards points to the first
|
||||
* unprocessed character (unchanged if rv=-1)
|
||||
*/
|
||||
int
|
||||
unbksl(const char **sp)
|
||||
{
|
||||
int wc, i;
|
||||
const char *cp = (*sp);
|
||||
|
||||
switch (*cp++) {
|
||||
case 'a':
|
||||
/*
|
||||
* according to the comments in pdksh, \007 seems
|
||||
* to be more portable than \a (due to HP-UX cc,
|
||||
* Ultrix cc, old pcc, etc.) so we avoid the escape
|
||||
* sequence altogether in mksh and assume ASCII
|
||||
*/
|
||||
wc = 7;
|
||||
break;
|
||||
case 'b':
|
||||
wc = '\b';
|
||||
break;
|
||||
case 'f':
|
||||
wc = '\f';
|
||||
break;
|
||||
case 'n':
|
||||
wc = '\n';
|
||||
break;
|
||||
case 'r':
|
||||
wc = '\r';
|
||||
break;
|
||||
case 't':
|
||||
wc = '\t';
|
||||
break;
|
||||
case 'v':
|
||||
/* assume ASCII here as well */
|
||||
wc = 11;
|
||||
break;
|
||||
case '0':
|
||||
/*
|
||||
* look for an octal number with up to three
|
||||
* digits, not counting the leading zero;
|
||||
* convert it to a raw octet
|
||||
*/
|
||||
wc = 0;
|
||||
i = 3;
|
||||
while (i-- && *cp >= '0' && *cp <= '7')
|
||||
wc = (wc << 3) + (*cp++ - '0');
|
||||
break;
|
||||
case 'U':
|
||||
i = 8;
|
||||
if (0)
|
||||
/* FALLTHROUGH */
|
||||
case 'u':
|
||||
i = 4;
|
||||
if (0)
|
||||
/* FALLTHROUGH */
|
||||
case 'x':
|
||||
i = 2;
|
||||
/*
|
||||
* x: look for a hexadecimal number with up to
|
||||
* two digits; convert to raw octet
|
||||
* u: look for a hexadecimal number with up to
|
||||
* four (U: eight) digits; convert to Unicode
|
||||
*/
|
||||
wc = 0;
|
||||
while (i--) {
|
||||
wc <<= 4;
|
||||
if (*cp >= '0' && *cp <= '9')
|
||||
wc += *cp++ - '0';
|
||||
else if (*cp >= 'A' && *cp <= 'F')
|
||||
wc += *cp++ - 'A' + 10;
|
||||
else if (*cp >= 'a' && *cp <= 'f')
|
||||
wc += *cp++ - 'a' + 10;
|
||||
else {
|
||||
wc >>= 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (**sp != 'x')
|
||||
/* Unicode marker */
|
||||
wc += 0x100;
|
||||
break;
|
||||
case '\\':
|
||||
wc = '\\';
|
||||
break;
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
||||
(*sp) = cp;
|
||||
return (wc);
|
||||
}
|
||||
|
53
mksh.1
53
mksh.1
@ -1,4 +1,4 @@
|
||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.184 2009/09/07 17:24:49 tg Exp $
|
||||
.\" $MirOS: src/bin/mksh/mksh.1,v 1.185 2009/09/19 15:16:04 tg Exp $
|
||||
.\" $OpenBSD: ksh.1,v 1.129 2009/05/28 06:09:06 jmc Exp $
|
||||
.\"-
|
||||
.\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
||||
@ -48,7 +48,7 @@
|
||||
.el .xD \\$1 \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8
|
||||
..
|
||||
.\"-
|
||||
.Dd $Mdocdate: September 7 2009 $
|
||||
.Dd $Mdocdate: September 19 2009 $
|
||||
.Dt MKSH 1
|
||||
.Os MirBSD
|
||||
.Sh NAME
|
||||
@ -868,6 +868,32 @@ the
|
||||
and the newline are stripped; otherwise, both the
|
||||
.Ql \e
|
||||
and the character following are unchanged.
|
||||
.Ss Backslash expansion
|
||||
In places where backslashes are expanded, certain C and
|
||||
.At
|
||||
.Nm ksh
|
||||
or GNU
|
||||
.Nm bash
|
||||
style escapes are translated.
|
||||
These include
|
||||
.Ql \ea ,
|
||||
.Ql \eb ,
|
||||
.Ql \ef ,
|
||||
.Ql \en ,
|
||||
.Ql \er ,
|
||||
.Ql \et ,
|
||||
.Ql \eU######## ,
|
||||
.Ql \eu#### ,
|
||||
.Ql \ev ,
|
||||
.Ql \ex## ,
|
||||
and
|
||||
.Ql \e0### ;
|
||||
.Ql #
|
||||
is, in the case of \e0###, an octal, or, in the case of \ex##,
|
||||
\eu#### or \eU########, a hexadecimal digit, of which there may
|
||||
be none up to two (x), three (0), four (u), or eight (U).
|
||||
The \ex## and \e0### escapes translate to raw 8-bit octets;
|
||||
the \eu#### and \eU######## escapes translate a Unicode codepoint to UTF-8.
|
||||
.Ss Aliases
|
||||
There are two types of aliases: normal command aliases and tracked aliases.
|
||||
Command aliases are normally used as a short hand for a long or often used
|
||||
@ -3266,24 +3292,11 @@ The
|
||||
.Fl n
|
||||
option suppresses the newline.
|
||||
By default, certain C escapes are translated.
|
||||
These include
|
||||
.Ql \eb ,
|
||||
.Ql \ef ,
|
||||
.Ql \en ,
|
||||
.Ql \er ,
|
||||
.Ql \et ,
|
||||
.Ql \eu#### ,
|
||||
.Ql \ev ,
|
||||
.Ql \ex## ,
|
||||
and
|
||||
.Ql \e0### ;
|
||||
.Ql #
|
||||
is, in the case of \e0###, an octal, or, in the case of \eu####
|
||||
or \ex##, a hexadecimal digit, of which there may be 0 to 2/3/4.
|
||||
The \ex## and \e0### escapes translate to raw 8-bit octets;
|
||||
the \eu#### escape translates a Unicode codepoint to UTF-8.
|
||||
.Ql \ec
|
||||
is equivalent to using the
|
||||
These include at least these mentioned in
|
||||
.Sx Backslash expansion
|
||||
above, as well as
|
||||
.Ql \ec ,
|
||||
which is equivalent to using the
|
||||
.Fl n
|
||||
option.
|
||||
.Ql \e
|
||||
|
3
sh.h
3
sh.h
@ -134,7 +134,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef EXTERN
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.340 2009/09/07 17:24:49 tg Exp $");
|
||||
__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.341 2009/09/19 15:16:05 tg Exp $");
|
||||
#endif
|
||||
#define MKSH_VERSION "R39 2009/09/07"
|
||||
|
||||
@ -1587,6 +1587,7 @@ void set_current_wd(char *);
|
||||
char *strdup_(const char *, Area *);
|
||||
char *strndup_(const char *, size_t, Area *);
|
||||
#endif
|
||||
int unbksl(const char **);
|
||||
/* shf.c */
|
||||
struct shf *shf_open(const char *, int, int, int);
|
||||
struct shf *shf_fdopen(int, int, struct shf *);
|
||||
|
Loading…
Reference in New Issue
Block a user