* regex/engine.c (step): Drop Cygwin-specific definition.
(NONCHAR): Better cast here to make the test work. Move comment from step here. (matcher): Disable skipping initial string in multibyte case. * regex/regcomp.c (p_bracket): Don't simplify singleton in the invert case. (p_b_term): Handle early end of pattern after dash in bracket expression. (singleton): Don't ignore the wides just because there's already a singleton in the single byte chars. Fix condition for a singleton wide accordingly. (findmust): Check for LC_CTYPE charset, rather than LC_COLLATE charset. * regex2.h (CHIN): Fix condition in the icase & invert case. (ISWORD): Fix wrong cast to unsigned char.
This commit is contained in:
@ -106,11 +106,7 @@ static const char *dissect(struct match *m, const char *start, const char *stop,
|
||||
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
|
||||
static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||
static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||
#ifdef __CYGWIN__
|
||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
|
||||
#else
|
||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
|
||||
#endif
|
||||
#define MAX_RECURSION 100
|
||||
#define BOL (OUT-1)
|
||||
#define EOL (BOL-1)
|
||||
@ -119,7 +115,10 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
|
||||
#define BOW (BOL-4)
|
||||
#define EOW (BOL-5)
|
||||
#define BADCHAR (BOL-6)
|
||||
#define NONCHAR(c) ((c) <= OUT)
|
||||
/* When using wint_t, which is defined as unsigned int on BSD,
|
||||
as well as on Cygwin or Linux, the NONCHAR test is broken without
|
||||
the below cast. I'm wondering how this is supposed to work at all... */
|
||||
#define NONCHAR(c) ((int)(c) <= OUT)
|
||||
#ifdef REDEBUG
|
||||
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
@ -248,9 +247,12 @@ matcher(struct re_guts *g,
|
||||
ZAPSTATE(&m->mbs);
|
||||
|
||||
/* Adjust start according to moffset, to speed things up */
|
||||
#ifndef MNAMES
|
||||
/* The code evaluating moffset doesn't seem to work right
|
||||
in the multibyte case. */
|
||||
if (g->moffset > -1)
|
||||
start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
|
||||
|
||||
#endif
|
||||
SP("mloop", m->st, *start);
|
||||
|
||||
/* this loop does only one repetition except for backrefs */
|
||||
@ -993,14 +995,7 @@ step(struct re_guts *g,
|
||||
sopno start, /* start state within strip */
|
||||
sopno stop, /* state after stop state within strip */
|
||||
states bef, /* states reachable before */
|
||||
#ifdef __CYGWIN__
|
||||
/* When using wint_t, which is defined as unsigned int on BSD,
|
||||
as well as on Cygwin or Linux, the NONCHAR test is broken.
|
||||
I'm wondering how this is supposed to work at all... */
|
||||
int ch, /* character or NONCHAR code */
|
||||
#else
|
||||
wint_t ch, /* character or NONCHAR code */
|
||||
#endif
|
||||
states aft) /* states already known reachable after */
|
||||
{
|
||||
cset *cs;
|
||||
|
Reference in New Issue
Block a user