/* * This file is part of the UCB release of Plan 9. It is subject to the license * terms in the LICENSE file found in the top-level directory of this * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No * part of the UCB release of Plan 9, including this file, may be copied, * modified, propagated, or distributed except according to the terms contained * in the LICENSE file. */ #include "grep.h" /* * incremental compiler. * add the branch c to the * state s. */ void increment(State * s, int c) { int i; State *t, **tt; Re *re1, *re2; nfollow = 0; gen++; matched = 0; for (i = 0; i < s->count; i++) fol1(s->re[i], c); qsort(follow, nfollow, sizeof(*follow), fcmp); for (tt = &state0; t = *tt;) { if (t->count > nfollow) { tt = &t->linkleft; goto cont; } if (t->count < nfollow) { tt = &t->linkright; goto cont; } for (i = 0; i < nfollow; i++) { re1 = t->re[i]; re2 = follow[i]; if (re1 > re2) { tt = &t->linkleft; goto cont; } if (re1 < re2) { tt = &t->linkright; goto cont; } } if (! !matched && !t->match) { tt = &t->linkleft; goto cont; } if (!matched && ! !t->match) { tt = &t->linkright; goto cont; } s->next[c] = t; return; cont: ; } t = sal(nfollow); *tt = t; for (i = 0; i < nfollow; i++) { re1 = follow[i]; t->re[i] = re1; } s->next[c] = t; t->match = matched; } int fcmp(const void *va, const void *vb) { Re **aa, **bb; Re *a, *b; aa = (Re **)va; bb = (Re **)vb; a = *aa; b = *bb; if (a > b) return 1; if (a < b) return -1; return 0; } void fol1(Re * r, int c) { Re *r1; loop: if (r->gen == gen) return; if (nfollow >= maxfollow) error("nfollow"); r->gen = gen; switch (r->type) { default: error("fol1"); case Tcase: if (c >= 0 && c < 256) if (r1 = r->cases[c]) follow[nfollow++] = r1; if (r = r->next) goto loop; break; case Talt: case Tor: fol1(r->alt, c); r = r->next; goto loop; case Tbegin: if (c == '\n' || c == Cbegin) follow[nfollow++] = r->next; break; case Tend: if (c == '\n') { if (r->next == 0) { matched = 1; break; } r = r->next; goto loop; } break; case Tclass: if (c >= r->lo && c <= r->hi) follow[nfollow++] = r->next; break; } } Rune tab1[] = { 0x007f, 0x07ff, }; Rune tab2[] = { 0x003f, 0x0fff, }; Re2 rclass(Rune p0, Rune p1) { char xc0[6], xc1[6]; int i, n, m; Re2 x; if (p0 > p1) return re2char(0xff, 0xff); // no match /* * bust range into same length * character sequences */ for (i = 0; i < nelem(tab1); i++) { m = tab1[i]; if (p0 <= m && p1 > m) return re2or(rclass(p0, m), rclass(m + 1, p1)); } /* * bust range into part of a single page * or into full pages */ for (i = 0; i < nelem(tab2); i++) { m = tab2[i]; if ((p0 & ~m) != (p1 & ~m)) { if ((p0 & m) != 0) return re2or(rclass(p0, p0 | m), rclass((p0 | m) + 1, p1)); if ((p1 & m) != m) return re2or(rclass(p0, (p1 & ~m) - 1), rclass(p1 & ~m, p1)); } } n = runetochar(xc0, &p0); i = runetochar(xc1, &p1); if (i != n) error("length"); x = re2char(xc0[0], xc1[0]); for (i = 1; i < n; i++) x = re2cat(x, re2char(xc0[i], xc1[i])); return x; } int pcmp(const void *va, const void *vb) { int n; const Rune *a, *b; a = va; b = vb; n = a[0] - b[0]; if (n) return n; return a[1] - b[1]; } /* * convert character chass into * run-pair ranges of matches. * this is 10646/utf specific and * needs to be changed for some * other input character set. * this is the key to a fast * regular search of characters * by looking at sequential bytes. */ Re2 re2class(char *s) { Rune pairs[200 + 2], *p, *q, ov; int nc; Re2 x; nc = 0; if (*s == '^') { nc = 1; s++; } p = pairs; s += chartorune(p, s); for (;;) { if (*p == '\\') s += chartorune(p, s); if (*p == 0) break; p[1] = *p; p += 2; if (p >= pairs + nelem(pairs) - 2) error("class too big"); s += chartorune(p, s); if (*p != '-') continue; s += chartorune(p, s); if (*p == '\\') s += chartorune(p, s); if (*p == 0) break; p[-1] = *p; s += chartorune(p, s); } *p = 0; qsort(pairs, (p - pairs) / 2, 2 * sizeof(*pairs), pcmp); q = pairs; for (p = pairs + 2; *p; p += 2) { if (p[0] > p[1]) continue; if (p[0] > q[1] || p[1] < q[0]) { q[2] = p[0]; q[3] = p[1]; q += 2; continue; } if (p[0] < q[0]) q[0] = p[0]; if (p[1] > q[1]) q[1] = p[1]; } q[2] = 0; p = pairs; if (nc) { x = rclass(0, p[0] - 1); ov = p[1] + 1; for (p += 2; *p; p += 2) { x = re2or(x, rclass(ov, p[0] - 1)); ov = p[1] + 1; } x = re2or(x, rclass(ov, Runemask)); } else { x = rclass(p[0], p[1]); for (p += 2; *p; p += 2) x = re2or(x, rclass(p[0], p[1])); } return x; }