294 lines
4.7 KiB
C
294 lines
4.7 KiB
C
/*
|
|
* This file is part of the UCB release of Plan 9. It is subject to the license
|
|
* terms in the LICENSE file found in the top-level directory of this
|
|
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
|
|
* part of the UCB release of Plan 9, including this file, may be copied,
|
|
* modified, propagated, or distributed except according to the terms contained
|
|
* in the LICENSE file.
|
|
*/
|
|
|
|
#include "grep.h"
|
|
|
|
/*
|
|
* incremental compiler.
|
|
* add the branch c to the
|
|
* state s.
|
|
*/
|
|
void
|
|
increment(State * s, int c)
|
|
{
|
|
int i;
|
|
State *t, **tt;
|
|
Re *re1, *re2;
|
|
|
|
nfollow = 0;
|
|
gen++;
|
|
matched = 0;
|
|
for (i = 0; i < s->count; i++)
|
|
fol1(s->re[i], c);
|
|
qsort(follow, nfollow, sizeof(*follow), fcmp);
|
|
for (tt = &state0; t = *tt;) {
|
|
if (t->count > nfollow) {
|
|
tt = &t->linkleft;
|
|
goto cont;
|
|
}
|
|
if (t->count < nfollow) {
|
|
tt = &t->linkright;
|
|
goto cont;
|
|
}
|
|
for (i = 0; i < nfollow; i++) {
|
|
re1 = t->re[i];
|
|
re2 = follow[i];
|
|
if (re1 > re2) {
|
|
tt = &t->linkleft;
|
|
goto cont;
|
|
}
|
|
if (re1 < re2) {
|
|
tt = &t->linkright;
|
|
goto cont;
|
|
}
|
|
}
|
|
if (! !matched && !t->match) {
|
|
tt = &t->linkleft;
|
|
goto cont;
|
|
}
|
|
if (!matched && ! !t->match) {
|
|
tt = &t->linkright;
|
|
goto cont;
|
|
}
|
|
s->next[c] = t;
|
|
return;
|
|
cont: ;
|
|
}
|
|
|
|
t = sal(nfollow);
|
|
*tt = t;
|
|
for (i = 0; i < nfollow; i++) {
|
|
re1 = follow[i];
|
|
t->re[i] = re1;
|
|
}
|
|
s->next[c] = t;
|
|
t->match = matched;
|
|
}
|
|
|
|
int
|
|
fcmp(const void *va, const void *vb)
|
|
{
|
|
Re **aa, **bb;
|
|
Re *a, *b;
|
|
|
|
aa = (Re **)va;
|
|
bb = (Re **)vb;
|
|
a = *aa;
|
|
b = *bb;
|
|
if (a > b)
|
|
return 1;
|
|
if (a < b)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
fol1(Re * r, int c)
|
|
{
|
|
Re *r1;
|
|
|
|
loop:
|
|
if (r->gen == gen)
|
|
return;
|
|
if (nfollow >= maxfollow)
|
|
error("nfollow");
|
|
r->gen = gen;
|
|
switch (r->type) {
|
|
default:
|
|
error("fol1");
|
|
|
|
case Tcase:
|
|
if (c >= 0 && c < 256)
|
|
if (r1 = r->cases[c])
|
|
follow[nfollow++] = r1;
|
|
if (r = r->next)
|
|
goto loop;
|
|
break;
|
|
|
|
case Talt:
|
|
case Tor:
|
|
fol1(r->alt, c);
|
|
r = r->next;
|
|
goto loop;
|
|
|
|
case Tbegin:
|
|
if (c == '\n' || c == Cbegin)
|
|
follow[nfollow++] = r->next;
|
|
break;
|
|
|
|
case Tend:
|
|
if (c == '\n') {
|
|
if (r->next == 0) {
|
|
matched = 1;
|
|
break;
|
|
}
|
|
r = r->next;
|
|
goto loop;
|
|
}
|
|
break;
|
|
|
|
case Tclass:
|
|
if (c >= r->lo && c <= r->hi)
|
|
follow[nfollow++] = r->next;
|
|
break;
|
|
}
|
|
}
|
|
|
|
Rune tab1[] = {
|
|
0x007f,
|
|
0x07ff,
|
|
};
|
|
|
|
Rune tab2[] = {
|
|
0x003f,
|
|
0x0fff,
|
|
};
|
|
|
|
Re2
|
|
rclass(Rune p0, Rune p1)
|
|
{
|
|
char xc0[6], xc1[6];
|
|
int i, n, m;
|
|
Re2 x;
|
|
|
|
if (p0 > p1)
|
|
return re2char(0xff, 0xff); // no match
|
|
|
|
/*
|
|
* bust range into same length
|
|
* character sequences
|
|
*/
|
|
for (i = 0; i < nelem(tab1); i++) {
|
|
m = tab1[i];
|
|
if (p0 <= m && p1 > m)
|
|
return re2or(rclass(p0, m), rclass(m + 1, p1));
|
|
}
|
|
|
|
/*
|
|
* bust range into part of a single page
|
|
* or into full pages
|
|
*/
|
|
for (i = 0; i < nelem(tab2); i++) {
|
|
m = tab2[i];
|
|
if ((p0 & ~m) != (p1 & ~m)) {
|
|
if ((p0 & m) != 0)
|
|
return re2or(rclass(p0, p0 | m), rclass((p0 | m) + 1, p1));
|
|
if ((p1 & m) != m)
|
|
return re2or(rclass(p0, (p1 & ~m) - 1), rclass(p1 & ~m, p1));
|
|
}
|
|
}
|
|
|
|
n = runetochar(xc0, &p0);
|
|
i = runetochar(xc1, &p1);
|
|
if (i != n)
|
|
error("length");
|
|
|
|
x = re2char(xc0[0], xc1[0]);
|
|
for (i = 1; i < n; i++)
|
|
x = re2cat(x, re2char(xc0[i], xc1[i]));
|
|
return x;
|
|
}
|
|
|
|
int
|
|
pcmp(const void *va, const void *vb)
|
|
{
|
|
int n;
|
|
const Rune *a, *b;
|
|
|
|
a = va;
|
|
b = vb;
|
|
|
|
n = a[0] - b[0];
|
|
if (n)
|
|
return n;
|
|
return a[1] - b[1];
|
|
}
|
|
|
|
/*
|
|
* convert character chass into
|
|
* run-pair ranges of matches.
|
|
* this is 10646/utf specific and
|
|
* needs to be changed for some
|
|
* other input character set.
|
|
* this is the key to a fast
|
|
* regular search of characters
|
|
* by looking at sequential bytes.
|
|
*/
|
|
Re2
|
|
re2class(char *s)
|
|
{
|
|
Rune pairs[200 + 2], *p, *q, ov;
|
|
int nc;
|
|
Re2 x;
|
|
|
|
nc = 0;
|
|
if (*s == '^') {
|
|
nc = 1;
|
|
s++;
|
|
}
|
|
|
|
p = pairs;
|
|
s += chartorune(p, s);
|
|
for (;;) {
|
|
if (*p == '\\')
|
|
s += chartorune(p, s);
|
|
if (*p == 0)
|
|
break;
|
|
p[1] = *p;
|
|
p += 2;
|
|
if (p >= pairs + nelem(pairs) - 2)
|
|
error("class too big");
|
|
s += chartorune(p, s);
|
|
if (*p != '-')
|
|
continue;
|
|
s += chartorune(p, s);
|
|
if (*p == '\\')
|
|
s += chartorune(p, s);
|
|
if (*p == 0)
|
|
break;
|
|
p[-1] = *p;
|
|
s += chartorune(p, s);
|
|
}
|
|
*p = 0;
|
|
qsort(pairs, (p - pairs) / 2, 2 * sizeof(*pairs), pcmp);
|
|
|
|
q = pairs;
|
|
for (p = pairs + 2; *p; p += 2) {
|
|
if (p[0] > p[1])
|
|
continue;
|
|
if (p[0] > q[1] || p[1] < q[0]) {
|
|
q[2] = p[0];
|
|
q[3] = p[1];
|
|
q += 2;
|
|
continue;
|
|
}
|
|
if (p[0] < q[0])
|
|
q[0] = p[0];
|
|
if (p[1] > q[1])
|
|
q[1] = p[1];
|
|
}
|
|
q[2] = 0;
|
|
|
|
p = pairs;
|
|
if (nc) {
|
|
x = rclass(0, p[0] - 1);
|
|
ov = p[1] + 1;
|
|
for (p += 2; *p; p += 2) {
|
|
x = re2or(x, rclass(ov, p[0] - 1));
|
|
ov = p[1] + 1;
|
|
}
|
|
x = re2or(x, rclass(ov, Runemask));
|
|
} else {
|
|
x = rclass(p[0], p[1]);
|
|
for (p += 2; *p; p += 2)
|
|
x = re2or(x, rclass(p[0], p[1]));
|
|
}
|
|
return x;
|
|
}
|