242 lines
5.3 KiB
C
242 lines
5.3 KiB
C
|
/*
|
||
|
* This file is part of the UCB release of Plan 9. It is subject to the license
|
||
|
* terms in the LICENSE file found in the top-level directory of this
|
||
|
* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
|
||
|
* part of the UCB release of Plan 9, including this file, may be copied,
|
||
|
* modified, propagated, or distributed except according to the terms contained
|
||
|
* in the LICENSE file.
|
||
|
*/
|
||
|
|
||
|
#include <u.h>
|
||
|
#include <libc.h>
|
||
|
#include "regexp.h"
|
||
|
#include "regcomp.h"
|
||
|
|
||
|
|
||
|
/*
|
||
|
* return 0 if no match
|
||
|
* >0 if a match
|
||
|
* <0 if we ran out of _relist space
|
||
|
*/
|
||
|
static int
|
||
|
regexec1(Reprog *progp, /* program to run */
|
||
|
char *bol, /* string to run machine on */
|
||
|
Resub *mp, /* subexpression elements */
|
||
|
int ms, /* number of elements at mp */
|
||
|
Reljunk *j
|
||
|
)
|
||
|
{
|
||
|
int flag=0;
|
||
|
Reinst *inst;
|
||
|
Relist *tlp;
|
||
|
char *s;
|
||
|
int i, checkstart;
|
||
|
Rune r, *rp, *ep;
|
||
|
int n;
|
||
|
Relist* tl; /* This list, next list */
|
||
|
Relist* nl;
|
||
|
Relist* tle; /* ends of this and next list */
|
||
|
Relist* nle;
|
||
|
int match;
|
||
|
char *p;
|
||
|
|
||
|
match = 0;
|
||
|
checkstart = j->starttype;
|
||
|
if(mp)
|
||
|
for(i=0; i<ms; i++) {
|
||
|
mp[i].sp = 0;
|
||
|
mp[i].ep = 0;
|
||
|
}
|
||
|
j->relist[0][0].inst = 0;
|
||
|
j->relist[1][0].inst = 0;
|
||
|
|
||
|
/* Execute machine once for each character, including terminal NUL */
|
||
|
s = j->starts;
|
||
|
do{
|
||
|
/* fast check for first char */
|
||
|
if(checkstart) {
|
||
|
switch(j->starttype) {
|
||
|
case RUNE:
|
||
|
p = utfrune(s, j->startchar);
|
||
|
if(p == 0 || s == j->eol)
|
||
|
return match;
|
||
|
s = p;
|
||
|
break;
|
||
|
case BOL:
|
||
|
if(s == bol)
|
||
|
break;
|
||
|
p = utfrune(s, '\n');
|
||
|
if(p == 0 || s == j->eol)
|
||
|
return match;
|
||
|
s = p+1;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
r = *(uint8_t*)s;
|
||
|
if(r < Runeself)
|
||
|
n = 1;
|
||
|
else
|
||
|
n = chartorune(&r, s);
|
||
|
|
||
|
/* switch run lists */
|
||
|
tl = j->relist[flag];
|
||
|
tle = j->reliste[flag];
|
||
|
nl = j->relist[flag^=1];
|
||
|
nle = j->reliste[flag];
|
||
|
nl->inst = 0;
|
||
|
|
||
|
/* Add first instruction to current list */
|
||
|
if(match == 0)
|
||
|
_renewemptythread(tl, progp->startinst, ms, s);
|
||
|
|
||
|
/* Execute machine until current list is empty */
|
||
|
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
|
||
|
for(inst = tlp->inst; ; inst = inst->next){
|
||
|
switch(inst->type){
|
||
|
case RUNE: /* regular character */
|
||
|
if(inst->r == r){
|
||
|
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||
|
return -1;
|
||
|
}
|
||
|
break;
|
||
|
case LBRA:
|
||
|
tlp->se.m[inst->subid].sp = s;
|
||
|
continue;
|
||
|
case RBRA:
|
||
|
tlp->se.m[inst->subid].ep = s;
|
||
|
continue;
|
||
|
case ANY:
|
||
|
if(r != '\n')
|
||
|
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||
|
return -1;
|
||
|
break;
|
||
|
case ANYNL:
|
||
|
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||
|
return -1;
|
||
|
break;
|
||
|
case BOL:
|
||
|
if(s == bol || *(s-1) == '\n')
|
||
|
continue;
|
||
|
break;
|
||
|
case EOL:
|
||
|
if(s == j->eol || r == 0 || r == '\n')
|
||
|
continue;
|
||
|
break;
|
||
|
case CCLASS:
|
||
|
ep = inst->cp->end;
|
||
|
for(rp = inst->cp->spans; rp < ep; rp += 2)
|
||
|
if(r >= rp[0] && r <= rp[1]){
|
||
|
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||
|
return -1;
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
case NCCLASS:
|
||
|
ep = inst->cp->end;
|
||
|
for(rp = inst->cp->spans; rp < ep; rp += 2)
|
||
|
if(r >= rp[0] && r <= rp[1])
|
||
|
break;
|
||
|
if(rp == ep)
|
||
|
if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
|
||
|
return -1;
|
||
|
break;
|
||
|
case OR:
|
||
|
/* evaluate right choice later */
|
||
|
if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
|
||
|
return -1;
|
||
|
/* efficiency: advance and re-evaluate */
|
||
|
continue;
|
||
|
case END: /* Match! */
|
||
|
match = 1;
|
||
|
tlp->se.m[0].ep = s;
|
||
|
if(mp != 0)
|
||
|
_renewmatch(mp, ms, &tlp->se);
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if(s == j->eol)
|
||
|
break;
|
||
|
checkstart = j->starttype && nl->inst==0;
|
||
|
s += n;
|
||
|
}while(r);
|
||
|
return match;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
regexec2(Reprog *progp, /* program to run */
|
||
|
char *bol, /* string to run machine on */
|
||
|
Resub *mp, /* subexpression elements */
|
||
|
int ms, /* number of elements at mp */
|
||
|
Reljunk *j
|
||
|
)
|
||
|
{
|
||
|
int rv;
|
||
|
Relist *relist0, *relist1;
|
||
|
|
||
|
/* mark space */
|
||
|
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||
|
if(relist0 == nil)
|
||
|
return -1;
|
||
|
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||
|
if(relist1 == nil){
|
||
|
free(relist1);
|
||
|
return -1;
|
||
|
}
|
||
|
j->relist[0] = relist0;
|
||
|
j->relist[1] = relist1;
|
||
|
j->reliste[0] = relist0 + BIGLISTSIZE - 2;
|
||
|
j->reliste[1] = relist1 + BIGLISTSIZE - 2;
|
||
|
|
||
|
rv = regexec1(progp, bol, mp, ms, j);
|
||
|
free(relist0);
|
||
|
free(relist1);
|
||
|
return rv;
|
||
|
}
|
||
|
|
||
|
extern int
|
||
|
regexec(Reprog *progp, /* program to run */
|
||
|
char *bol, /* string to run machine on */
|
||
|
Resub *mp, /* subexpression elements */
|
||
|
int ms) /* number of elements at mp */
|
||
|
{
|
||
|
Reljunk j;
|
||
|
Relist relist0[LISTSIZE], relist1[LISTSIZE];
|
||
|
int rv;
|
||
|
|
||
|
/*
|
||
|
* use user-specified starting/ending location if specified
|
||
|
*/
|
||
|
j.starts = bol;
|
||
|
j.eol = 0;
|
||
|
if(mp && ms>0){
|
||
|
if(mp->sp)
|
||
|
j.starts = mp->sp;
|
||
|
if(mp->ep)
|
||
|
j.eol = mp->ep;
|
||
|
}
|
||
|
j.starttype = 0;
|
||
|
j.startchar = 0;
|
||
|
if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
|
||
|
j.starttype = RUNE;
|
||
|
j.startchar = progp->startinst->r;
|
||
|
}
|
||
|
if(progp->startinst->type == BOL)
|
||
|
j.starttype = BOL;
|
||
|
|
||
|
/* mark space */
|
||
|
j.relist[0] = relist0;
|
||
|
j.relist[1] = relist1;
|
||
|
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||
|
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||
|
|
||
|
rv = regexec1(progp, bol, mp, ms, &j);
|
||
|
if(rv >= 0)
|
||
|
return rv;
|
||
|
rv = regexec2(progp, bol, mp, ms, &j);
|
||
|
if(rv >= 0)
|
||
|
return rv;
|
||
|
return -1;
|
||
|
}
|