diff --git a/winsup/cygwin/collate.h b/winsup/cygwin/collate.h index 3cfb8880c..fa8561fd5 100644 --- a/winsup/cygwin/collate.h +++ b/winsup/cygwin/collate.h @@ -1,6 +1,6 @@ /* collate.h: Internal BSD libc header, used in glob and regcomp, for instance. - Copyright 2012 Red Hat, Inc. + Copyright 2012, 2015 Red Hat, Inc. This file is part of Cygwin. @@ -13,10 +13,9 @@ details. */ extern "C" { #endif -/* We never have a collate load error. */ -const int __collate_load_error = 0; +extern const int __collate_load_error; -int __collate_range_cmp (int c1, int c2); +extern int __collate_range_cmp (int c1, int c2); #ifdef __cplusplus }; diff --git a/winsup/cygwin/globals.cc b/winsup/cygwin/globals.cc index 383b8f115..80e0f0db3 100644 --- a/winsup/cygwin/globals.cc +++ b/winsup/cygwin/globals.cc @@ -103,6 +103,9 @@ char NO_COPY almost_null[1]; extern "C" { +/* We never have a collate load error. */ +const int __collate_load_error = 0; + /* Heavily-used const UNICODE_STRINGs are defined here once. The idea is a speed improvement by not having to initialize a UNICODE_STRING every time we make a string comparison. The _RDATA trick allows defining the strings diff --git a/winsup/cygwin/libc/fnmatch.c b/winsup/cygwin/libc/fnmatch.c index 295f4c413..8c386787b 100644 --- a/winsup/cygwin/libc/fnmatch.c +++ b/winsup/cygwin/libc/fnmatch.c @@ -5,6 +5,11 @@ * This code is derived from software contributed to Berkeley by * Guido van Rossum. * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -34,9 +39,7 @@ static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; #endif /* LIBC_SCCS and not lint */ #include -#if 0 -__FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.18 2007/01/09 00:27:53 imp Exp $"); -#endif +__FBSDID("$FreeBSD: head/lib/libc/gen/fnmatch.c 288309 2015-09-27 12:52:18Z jilles $"); /* * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. @@ -60,7 +63,11 @@ __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.18 2007/01/09 00:27:53 imp Ex #include #include -#include "../posix/collate.h" +#ifdef __CYGWIN__ +#include "../collate.h" +#else +#include "collate.h" +#endif #define EOS '\0' @@ -69,31 +76,30 @@ __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.18 2007/01/09 00:27:53 imp Ex #define RANGE_ERROR (-1) static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); -static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t); +static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, + mbstate_t); int -fnmatch(pattern, string, flags) - const char *pattern, *string; - int flags; +fnmatch(const char *pattern, const char *string, int flags) { static const mbstate_t initial; - return (fnmatch1(pattern, string, flags, initial, initial)); + return (fnmatch1(pattern, string, string, flags, initial, initial)); } static int -fnmatch1(pattern, string, flags, patmbs, strmbs) - const char *pattern, *string; - int flags; - mbstate_t patmbs, strmbs; +fnmatch1(const char *pattern, const char *string, const char *stringstart, + int flags, mbstate_t patmbs, mbstate_t strmbs) { - const char *stringstart; + const char *bt_pattern, *bt_string; + mbstate_t bt_patmbs, bt_strmbs; char *newp; char c; wchar_t pc, sc; size_t pclen, sclen; - for (stringstart = string;;) { + bt_pattern = bt_string = NULL; + for (;;) { pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); if (pclen == (size_t)-1 || pclen == (size_t)-2) return (FNM_NOMATCH); @@ -108,16 +114,18 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) case EOS: if ((flags & FNM_LEADING_DIR) && sc == '/') return (0); - return (sc == EOS ? 0 : FNM_NOMATCH); + if (sc == EOS) + return (0); + goto backtrack; case '?': if (sc == EOS) return (FNM_NOMATCH); if (sc == '/' && (flags & FNM_PATHNAME)) - return (FNM_NOMATCH); + goto backtrack; if (sc == '.' && (flags & FNM_PERIOD) && (string == stringstart || ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) - return (FNM_NOMATCH); + goto backtrack; string += sclen; break; case '*': @@ -129,7 +137,7 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) if (sc == '.' && (flags & FNM_PERIOD) && (string == stringstart || ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) - return (FNM_NOMATCH); + goto backtrack; /* Optimize for pattern with * at end or before /. */ if (c == EOS) @@ -145,33 +153,24 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) break; } - /* General case, use recursion. */ - while (sc != EOS) { - if (!fnmatch1(pattern, string, - flags & ~FNM_PERIOD, patmbs, strmbs)) - return (0); - sclen = mbrtowc(&sc, string, MB_LEN_MAX, - &strmbs); - if (sclen == (size_t)-1 || - sclen == (size_t)-2) { - sc = (unsigned char)*string; - sclen = 1; - memset(&strmbs, 0, sizeof(strmbs)); - } - if (sc == '/' && flags & FNM_PATHNAME) - break; - string += sclen; - } - return (FNM_NOMATCH); + /* + * First try the shortest match for the '*' that + * could work. We can forget any earlier '*' since + * there is no way having it match more characters + * can help us, given that we are already here. + */ + bt_pattern = pattern, bt_patmbs = patmbs; + bt_string = string, bt_strmbs = strmbs; + break; case '[': if (sc == EOS) return (FNM_NOMATCH); if (sc == '/' && (flags & FNM_PATHNAME)) - return (FNM_NOMATCH); + goto backtrack; if (sc == '.' && (flags & FNM_PERIOD) && (string == stringstart || ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) - return (FNM_NOMATCH); + goto backtrack; switch (rangematch(pattern, sc, flags, &newp, &patmbs)) { @@ -181,7 +180,7 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) pattern = newp; break; case RANGE_NOMATCH: - return (FNM_NOMATCH); + goto backtrack; } string += sclen; break; @@ -191,21 +190,44 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) &patmbs); if (pclen == (size_t)-1 || pclen == (size_t)-2) return (FNM_NOMATCH); - if (pclen == 0) - pc = '\\'; pattern += pclen; } /* FALLTHROUGH */ default: norm: + string += sclen; if (pc == sc) ; else if ((flags & FNM_CASEFOLD) && (towlower(pc) == towlower(sc))) ; - else - return (FNM_NOMATCH); - string += sclen; + else { + backtrack: + /* + * If we have a mismatch (other than hitting + * the end of the string), go back to the last + * '*' seen and have it match one additional + * character. + */ + if (bt_pattern == NULL) + return (FNM_NOMATCH); + sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX, + &bt_strmbs); + if (sclen == (size_t)-1 || + sclen == (size_t)-2) { + sc = (unsigned char)*bt_string; + sclen = 1; + memset(&bt_strmbs, 0, + sizeof(bt_strmbs)); + } + if (sc == EOS) + return (FNM_NOMATCH); + if (sc == '/' && flags & FNM_PATHNAME) + return (FNM_NOMATCH); + bt_string += sclen; + pattern = bt_pattern, patmbs = bt_patmbs; + string = bt_string, strmbs = bt_strmbs; + } break; } } @@ -213,17 +235,17 @@ fnmatch1(pattern, string, flags, patmbs, strmbs) } static int -rangematch(pattern, test, flags, newp, patmbs) - const char *pattern; - wchar_t test; - int flags; - char **newp; - mbstate_t *patmbs; +rangematch(const char *pattern, wchar_t test, int flags, char **newp, + mbstate_t *patmbs) { int negate, ok; wchar_t c, c2; size_t pclen; const char *origpat; +#ifndef __CYGWIN__ + struct xlocale_collate *table = + (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; +#endif /* * A bracket expression starting with an unquoted circumflex @@ -278,11 +300,19 @@ rangematch(pattern, test, flags, newp, patmbs) if (flags & FNM_CASEFOLD) c2 = towlower(c2); +#ifdef __CYGWIN__ if (__collate_load_error ? c <= test && test <= c2 : __collate_range_cmp(c, test) <= 0 && __collate_range_cmp(test, c2) <= 0 ) +#else + if (table->__collate_load_error ? + c <= test && test <= c2 : + __collate_range_cmp(table, c, test) <= 0 + && __collate_range_cmp(table, test, c2) <= 0 + ) +#endif ok = 1; } else if (c == test) ok = 1;