From 01b54f1fd55345fc9d1b6b837d23a0900f830b84 Mon Sep 17 00:00:00 2001 From: tg Date: Wed, 27 Feb 2008 01:00:10 +0000 Subject: [PATCH] =?UTF-8?q?I=20=5Fthink=5F=20this=20implements=20${foo/bar?= =?UTF-8?q?/baz}=20logic=20(bar=20is=20a=20glob=20pattern)=20todo=20tomorr?= =?UTF-8?q?ow:=20=E2=80=A2=20test=20case=20(compare=20with=20e.g.=20GNU=20?= =?UTF-8?q?bash)=20=E2=80=A2=20manpage=20=E2=80=A2=20version=20bump?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sqchar is a bit ugly, but \/ must be preserved, as we don’t get wdencoded strings later on in the process (eval.c CSUBST) and I didn’t want to have an implementation like ${foo: 2: 3} this time --- eval.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- lex.c | 4 +-- misc.c | 8 ++--- sh.h | 7 +++-- 4 files changed, 97 insertions(+), 14 deletions(-) diff --git a/eval.c b/eval.c index ccc98a4..23f736b 100644 --- a/eval.c +++ b/eval.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.36 2007/10/25 15:34:29 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.37 2008/02/27 01:00:09 tg Exp $"); #ifdef MKSH_SMALL #define MKSH_NOPWNAM @@ -166,6 +166,7 @@ expand(const char *cp, /* input word */ int newlines = 0; /* For trailing newlines in COMSUB */ int saw_eq, tilde_ok; int make_magic; + int sqchar = 0; /* char to keep bksl before (${…/…\/…/…}) */ size_t len; if (cp == NULL) @@ -373,6 +374,13 @@ expand(const char *cp, /* input word */ *dp++ = MAGIC; *dp++ = (char)('@' | 0x80); break; + case '/': + /* ! DOBLANK,DOBRACE_,DOTILDE */ + f = DOPAT | (f&DONTRUNCOMMAND) | + DOTEMP_; + quote = 0; + sqchar = '/'; + break; case '=': /* Enabling tilde expansion * after :s here is @@ -416,6 +424,7 @@ expand(const char *cp, /* input word */ tilde_ok = 0; /* in case of ${unset:-} */ *dp = '\0'; quote = st->quote; + sqchar = 0; f = st->f; if (f&DOBLANK) doblank--; @@ -423,7 +432,10 @@ expand(const char *cp, /* input word */ case '#': case '%': /* Append end-pattern */ - *dp++ = MAGIC; *dp++ = ')'; *dp = '\0'; + *dp++ = MAGIC; *dp++ = ')'; + /* FALLTHROUGH */ + case '/': + *dp = '\0'; dp = Xrestpos(ds, dp, st->base); /* Must use st->var since calling * global would break things @@ -588,6 +600,9 @@ expand(const char *cp, /* input word */ break; } + if (quote && sqchar == c) + *dp++ = '\\'; + /* check for end of word or IFS separation */ if (c == 0 || (!quote && (f & DOBLANK) && doblank && !make_magic && ctype(c, C_IFS))) { @@ -823,6 +838,7 @@ varsub(Expand *xp, const char *sp, const char *word, switch (stype & 0x7f) { case '=': /* can't assign to a vector */ case '%': /* can't trim a vector (yet) */ + case '/': case '#': return -1; } @@ -845,6 +861,7 @@ varsub(Expand *xp, const char *sp, const char *word, case '=': /* can't assign to a vector */ case '%': /* can't trim a vector (yet) */ case '#': + case '/': case '?': return -1; } @@ -951,7 +968,7 @@ trimsub(char *str, char *pat, int how) char *end = strnul(str); char *p, c; - switch (how&0xff) { /* UCHAR_MAX maybe? */ + switch (how & 0xFF) { case '#': /* shortest at beginning */ for (p = str; p <= end; p++) { c = *p; *p = '\0'; @@ -962,7 +979,7 @@ trimsub(char *str, char *pat, int how) *p = c; } break; - case '#'|0x80: /* longest match at beginning */ + case '#'|0x80: /* longest match at beginning */ for (p = end; p >= str; p--) { c = *p; *p = '\0'; if (gmatchx(str, pat, false)) { @@ -978,12 +995,77 @@ trimsub(char *str, char *pat, int how) return str_nsave(str, p - str, ATEMP); } break; - case '%'|0x80: /* longest match at end */ + case '%'|0x80: /* longest match at end */ for (p = str; p <= end; p++) { if (gmatchx(p, pat, false)) return str_nsave(str, p - str, ATEMP); } break; + case '/': /* replace once - SLOW! */ + case '/'|0x80: /* replace all - SLOWER! */ + { + char *rpat, *rrep, *tpat1, *tpat2, *sbeg, *s, *d; + bool gotmatch = false; + + sbeg = s = str; + /* separate search pattern and replacement string */ + p = d = rpat = str_save(pat, ATEMP); + while (*p) + if (*p == '\\') { + p++; + if (*p) + p++; + } else if (*p == '/') { + *p++ = '\0'; + d = p; + gotmatch = true; + break; + } else + p++; + rrep = gotmatch ? d : null; + + /* first see if we have any match at all */ + tpat1 = shf_smprintf("%c%c%c*%s%c*%c)", MAGIC, '@' | 0x80, + MAGIC, rpat, MAGIC, MAGIC); + tpat2 = shf_smprintf("%c%c%s%c*%c)", MAGIC, '@' | 0x80, + rpat, MAGIC, MAGIC); + again_repl: + /* this would not be necessary if gmatchx would return + * the start and end values of a match found, like re* + */ + if (!gmatchx(s, tpat1, false)) + goto end_repl; + /* now anchor the beginning of the match */ + while (sbeg <= end) + if (gmatchx(sbeg, tpat2, false)) + break; + else + sbeg++; + /* now anchor the end of the match */ + for (p = end; p >= sbeg; p--) { + c = *p; *p = '\0'; + gotmatch = gmatchx(sbeg, rpat, false); + *p = c; + if (gotmatch) + break; + } + end = str_nsave(s, sbeg - s, ATEMP); + d = shf_smprintf("%s%s%s", end, rrep, p); + afree(end, ATEMP); + sbeg = d + (sbeg - s) + strlen(rrep); + if (s != str) + afree(s, ATEMP); + s = d; + end = strnul(s); + if ((how & 0xFF) != '/') + goto again_repl; + end_repl: + afree(rpat, ATEMP); + afree(tpat1, ATEMP); + afree(tpat2, ATEMP); + return (s); + break; + } } return str; /* no match, return string */ diff --git a/lex.c b/lex.c index 0ec8763..800f9a9 100644 --- a/lex.c +++ b/lex.c @@ -2,7 +2,7 @@ #include "sh.h" -__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.52 2008/02/26 21:08:33 tg Exp $"); +__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.53 2008/02/27 01:00:09 tg Exp $"); /* * states while lexing word @@ -395,7 +395,7 @@ yylex(int cf) /* If this is a trim operation, * treat (,|,) specially in STBRACE. */ - if (c == '#' || c == '%') { + if (ctype(c, C_SUBOP2)) { ungetsc(c); PUSH_STATE(STBRACE); } else { diff --git a/misc.c b/misc.c index 6ccc981..34d59c3 100644 --- a/misc.c +++ b/misc.c @@ -6,7 +6,7 @@ #include #endif -__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.67 2007/10/25 15:19:16 tg Exp $\t" +__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.68 2008/02/27 01:00:09 tg Exp $\t" MKSH_SH_H_ID); #undef USE_CHVT @@ -38,12 +38,12 @@ setctypes(const char *s, int t) unsigned i; if (t & C_IFS) { - for (i = 0; i < UCHAR_MAX+1; i++) + for (i = 0; i < UCHAR_MAX + 1; i++) chtypes[i] &= ~C_IFS; chtypes[0] |= C_IFS; /* include \0 in C_IFS */ } while (*s != 0) - chtypes[(unsigned char) *s++] |= t; + chtypes[(unsigned char)*s++] |= t; } void @@ -499,7 +499,7 @@ bi_getn(const char *as, int *ai) */ int -gmatchx(const char *s, const char *p, int isfile) +gmatchx(const char *s, const char *p, bool isfile) { const char *se, *pe; diff --git a/sh.h b/sh.h index 63fcdcf..4c1dd3a 100644 --- a/sh.h +++ b/sh.h @@ -8,7 +8,7 @@ /* $OpenBSD: c_test.h,v 1.4 2004/12/20 11:34:26 otto Exp $ */ /* $OpenBSD: tty.h,v 1.5 2004/12/20 11:34:26 otto Exp $ */ -#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.187 2008/02/26 20:43:11 tg Exp $" +#define MKSH_SH_H_ID "$MirOS: src/bin/mksh/sh.h,v 1.188 2008/02/27 01:00:10 tg Exp $" #define MKSH_VERSION "R33 2008/02/26" #if HAVE_SYS_PARAM_H @@ -543,7 +543,8 @@ EXTERN int really_exit; extern unsigned char chtypes[]; #define ctype(c, t) !!( ((t) == C_SUBOP2) ? \ - (((c) == '#' || (c) == '%') ? 1 : 0) : \ + (((c) == '#' || (c) == '%' || \ + (c) == '/') ? 1 : 0) : \ (chtypes[(unsigned char)(c)]&(t)) ) #define ksh_isalphx(c) ctype((c), C_ALPHA) #define ksh_isalnux(c) ctype((c), C_ALPHA | C_DIGIT) @@ -1394,7 +1395,7 @@ void change_flag(enum sh_flag, int, char); int parse_args(const char **, int, int *); int getn(const char *, int *); int bi_getn(const char *, int *); -int gmatchx(const char *, const char *, int); +int gmatchx(const char *, const char *, bool); int has_globbing(const char *, const char *); const unsigned char *pat_scan(const unsigned char *, const unsigned char *, int); int xstrcmp(const void *, const void *);