1*17049c45SAxel Dörfler /*
2*17049c45SAxel Dörfler Copyright (c) 1990-2002 Info-ZIP. All rights reserved.
3*17049c45SAxel Dörfler
4*17049c45SAxel Dörfler See the accompanying file LICENSE, version 2000-Apr-09 or later
5*17049c45SAxel Dörfler (the contents of which are also included in unzip.h) for terms of use.
6*17049c45SAxel Dörfler If, for some reason, all these files are missing, the Info-ZIP license
7*17049c45SAxel Dörfler also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
8*17049c45SAxel Dörfler */
9*17049c45SAxel Dörfler /*---------------------------------------------------------------------------
10*17049c45SAxel Dörfler
11*17049c45SAxel Dörfler match.c
12*17049c45SAxel Dörfler
13*17049c45SAxel Dörfler The match() routine recursively compares a string to a "pattern" (regular
14*17049c45SAxel Dörfler expression), returning TRUE if a match is found or FALSE if not. This
15*17049c45SAxel Dörfler version is specifically for use with unzip.c: as did the previous match()
16*17049c45SAxel Dörfler routines from SEA and J. Kercheval, it leaves the case (upper, lower, or
17*17049c45SAxel Dörfler mixed) of the string alone, but converts any uppercase characters in the
18*17049c45SAxel Dörfler pattern to lowercase if indicated by the global var pInfo->lcflag (which
19*17049c45SAxel Dörfler is to say, string is assumed to have been converted to lowercase already,
20*17049c45SAxel Dörfler if such was necessary).
21*17049c45SAxel Dörfler
22*17049c45SAxel Dörfler GRR: reversed order of text, pattern in matche() (now same as match());
23*17049c45SAxel Dörfler added ignore_case/ic flags, Case() macro.
24*17049c45SAxel Dörfler
25*17049c45SAxel Dörfler PaulK: replaced matche() with recmatch() from Zip, modified to have an
26*17049c45SAxel Dörfler ignore_case argument; replaced test frame with simpler one.
27*17049c45SAxel Dörfler
28*17049c45SAxel Dörfler ---------------------------------------------------------------------------
29*17049c45SAxel Dörfler
30*17049c45SAxel Dörfler Copyright on recmatch() from Zip's util.c (although recmatch() was almost
31*17049c45SAxel Dörfler certainly written by Mark Adler...ask me how I can tell :-) ):
32*17049c45SAxel Dörfler
33*17049c45SAxel Dörfler Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
34*17049c45SAxel Dörfler Kai Uwe Rommel and Igor Mandrichenko.
35*17049c45SAxel Dörfler
36*17049c45SAxel Dörfler Permission is granted to any individual or institution to use, copy,
37*17049c45SAxel Dörfler or redistribute this software so long as all of the original files are
38*17049c45SAxel Dörfler included unmodified, that it is not sold for profit, and that this copy-
39*17049c45SAxel Dörfler right notice is retained.
40*17049c45SAxel Dörfler
41*17049c45SAxel Dörfler ---------------------------------------------------------------------------
42*17049c45SAxel Dörfler
43*17049c45SAxel Dörfler Match the pattern (wildcard) against the string (fixed):
44*17049c45SAxel Dörfler
45*17049c45SAxel Dörfler match(string, pattern, ignore_case);
46*17049c45SAxel Dörfler
47*17049c45SAxel Dörfler returns TRUE if string matches pattern, FALSE otherwise. In the pattern:
48*17049c45SAxel Dörfler
49*17049c45SAxel Dörfler `*' matches any sequence of characters (zero or more)
50*17049c45SAxel Dörfler `?' matches any single character
51*17049c45SAxel Dörfler [SET] matches any character in the specified set,
52*17049c45SAxel Dörfler [!SET] or [^SET] matches any character not in the specified set.
53*17049c45SAxel Dörfler
54*17049c45SAxel Dörfler A set is composed of characters or ranges; a range looks like ``character
55*17049c45SAxel Dörfler hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
56*17049c45SAxel Dörfler characters allowed in the [..] pattern construct. Other characters are
57*17049c45SAxel Dörfler allowed (i.e., 8-bit characters) if your system will support them.
58*17049c45SAxel Dörfler
59*17049c45SAxel Dörfler To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
60*17049c45SAxel Dörfler side or outside a [..] construct, and match the character exactly, precede
61*17049c45SAxel Dörfler it with a ``\'' (backslash).
62*17049c45SAxel Dörfler
63*17049c45SAxel Dörfler Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is
64*17049c45SAxel Dörfler defined. See the DOSWILD section below for an explanation. Note also
65*17049c45SAxel Dörfler that with VMSWILD defined, '%' is used instead of '?', and sets (ranges)
66*17049c45SAxel Dörfler are delimited by () instead of [].
67*17049c45SAxel Dörfler
68*17049c45SAxel Dörfler ---------------------------------------------------------------------------*/
69*17049c45SAxel Dörfler
70*17049c45SAxel Dörfler
71*17049c45SAxel Dörfler #define __MATCH_C /* identifies this source module */
72*17049c45SAxel Dörfler
73*17049c45SAxel Dörfler /* define ToLower() in here (for Unix, define ToLower to be macro (using
74*17049c45SAxel Dörfler * isupper()); otherwise just use tolower() */
75*17049c45SAxel Dörfler #define UNZIP_INTERNAL
76*17049c45SAxel Dörfler #include "unzip.h"
77*17049c45SAxel Dörfler
78*17049c45SAxel Dörfler #ifndef THEOS /* the Theos port defines its own variant of match() */
79*17049c45SAxel Dörfler
80*17049c45SAxel Dörfler #if 0 /* this is not useful until it matches Amiga names insensitively */
81*17049c45SAxel Dörfler #ifdef AMIGA /* some other platforms might also want to use this */
82*17049c45SAxel Dörfler # define ANSI_CHARSET /* MOVE INTO UNZIP.H EVENTUALLY */
83*17049c45SAxel Dörfler #endif
84*17049c45SAxel Dörfler #endif /* 0 */
85*17049c45SAxel Dörfler
86*17049c45SAxel Dörfler #ifdef ANSI_CHARSET
87*17049c45SAxel Dörfler # ifdef ToLower
88*17049c45SAxel Dörfler # undef ToLower
89*17049c45SAxel Dörfler # endif
90*17049c45SAxel Dörfler /* uppercase letters are values 41 thru 5A, C0 thru D6, and D8 thru DE */
91*17049c45SAxel Dörfler # define IsUpper(c) (c>=0xC0 ? c<=0xDE && c!=0xD7 : c>=0x41 && c<=0x5A)
92*17049c45SAxel Dörfler # define ToLower(c) (IsUpper((uch) c) ? (unsigned) c | 0x20 : (unsigned) c)
93*17049c45SAxel Dörfler #endif
94*17049c45SAxel Dörfler #define Case(x) (ic? ToLower(x) : (x))
95*17049c45SAxel Dörfler
96*17049c45SAxel Dörfler #ifdef VMSWILD
97*17049c45SAxel Dörfler # define WILDCHAR '%'
98*17049c45SAxel Dörfler # define BEG_RANGE '('
99*17049c45SAxel Dörfler # define END_RANGE ')'
100*17049c45SAxel Dörfler #else
101*17049c45SAxel Dörfler # define WILDCHAR '?'
102*17049c45SAxel Dörfler # define BEG_RANGE '['
103*17049c45SAxel Dörfler # define END_RANGE ']'
104*17049c45SAxel Dörfler #endif
105*17049c45SAxel Dörfler
106*17049c45SAxel Dörfler #if 0 /* GRR: add this to unzip.h someday... */
107*17049c45SAxel Dörfler #if !(defined(MSDOS) && defined(DOSWILD))
108*17049c45SAxel Dörfler #define match(s,p,ic) (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic) == 1)
109*17049c45SAxel Dörfler int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, int ignore_case));
110*17049c45SAxel Dörfler #endif
111*17049c45SAxel Dörfler #endif /* 0 */
112*17049c45SAxel Dörfler static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
113*17049c45SAxel Dörfler int ignore_case));
114*17049c45SAxel Dörfler
115*17049c45SAxel Dörfler
116*17049c45SAxel Dörfler
117*17049c45SAxel Dörfler /* match() is a shell to recmatch() to return only Boolean values. */
118*17049c45SAxel Dörfler
match(string,pattern,ignore_case)119*17049c45SAxel Dörfler int match(string, pattern, ignore_case)
120*17049c45SAxel Dörfler ZCONST char *string, *pattern;
121*17049c45SAxel Dörfler int ignore_case;
122*17049c45SAxel Dörfler {
123*17049c45SAxel Dörfler #if (defined(MSDOS) && defined(DOSWILD))
124*17049c45SAxel Dörfler char *dospattern;
125*17049c45SAxel Dörfler int j = strlen(pattern);
126*17049c45SAxel Dörfler
127*17049c45SAxel Dörfler /*---------------------------------------------------------------------------
128*17049c45SAxel Dörfler Optional MS-DOS preprocessing section: compare last three chars of the
129*17049c45SAxel Dörfler wildcard to "*.*" and translate to "*" if found; else compare the last
130*17049c45SAxel Dörfler two characters to "*." and, if found, scan the non-wild string for dots.
131*17049c45SAxel Dörfler If in the latter case a dot is found, return failure; else translate the
132*17049c45SAxel Dörfler "*." to "*". In either case, continue with the normal (Unix-like) match
133*17049c45SAxel Dörfler procedure after translation. (If not enough memory, default to normal
134*17049c45SAxel Dörfler match.) This causes "a*.*" and "a*." to behave as MS-DOS users expect.
135*17049c45SAxel Dörfler ---------------------------------------------------------------------------*/
136*17049c45SAxel Dörfler
137*17049c45SAxel Dörfler if ((dospattern = (char *)malloc(j+1)) != NULL) {
138*17049c45SAxel Dörfler strcpy(dospattern, pattern);
139*17049c45SAxel Dörfler if (!strcmp(dospattern+j-3, "*.*")) {
140*17049c45SAxel Dörfler dospattern[j-2] = '\0'; /* nuke the ".*" */
141*17049c45SAxel Dörfler } else if (!strcmp(dospattern+j-2, "*.")) {
142*17049c45SAxel Dörfler char *p = MBSCHR(string, '.');
143*17049c45SAxel Dörfler
144*17049c45SAxel Dörfler if (p) { /* found a dot: match fails */
145*17049c45SAxel Dörfler free(dospattern);
146*17049c45SAxel Dörfler return 0;
147*17049c45SAxel Dörfler }
148*17049c45SAxel Dörfler dospattern[j-1] = '\0'; /* nuke the end "." */
149*17049c45SAxel Dörfler }
150*17049c45SAxel Dörfler j = recmatch((uch *)dospattern, (uch *)string, ignore_case);
151*17049c45SAxel Dörfler free(dospattern);
152*17049c45SAxel Dörfler return j == 1;
153*17049c45SAxel Dörfler } else
154*17049c45SAxel Dörfler #endif /* MSDOS && DOSWILD */
155*17049c45SAxel Dörfler return recmatch((uch *)pattern, (uch *)string, ignore_case) == 1;
156*17049c45SAxel Dörfler }
157*17049c45SAxel Dörfler
158*17049c45SAxel Dörfler
159*17049c45SAxel Dörfler
recmatch(p,s,ic)160*17049c45SAxel Dörfler static int recmatch(p, s, ic)
161*17049c45SAxel Dörfler ZCONST uch *p; /* sh pattern to match */
162*17049c45SAxel Dörfler ZCONST uch *s; /* string to which to match it */
163*17049c45SAxel Dörfler int ic; /* true for case insensitivity */
164*17049c45SAxel Dörfler /* Recursively compare the sh pattern p with the string s and return 1 if
165*17049c45SAxel Dörfler * they match, and 0 or 2 if they don't or if there is a syntax error in the
166*17049c45SAxel Dörfler * pattern. This routine recurses on itself no more deeply than the number
167*17049c45SAxel Dörfler * of characters in the pattern. */
168*17049c45SAxel Dörfler {
169*17049c45SAxel Dörfler unsigned int c; /* pattern char or start of range in [-] loop */
170*17049c45SAxel Dörfler
171*17049c45SAxel Dörfler /* Get first character, the pattern for new recmatch calls follows */
172*17049c45SAxel Dörfler c = *p; INCSTR(p);
173*17049c45SAxel Dörfler
174*17049c45SAxel Dörfler /* If that was the end of the pattern, match if string empty too */
175*17049c45SAxel Dörfler if (c == 0)
176*17049c45SAxel Dörfler return *s == 0;
177*17049c45SAxel Dörfler
178*17049c45SAxel Dörfler /* '?' (or '%') matches any character (but not an empty string).
179*17049c45SAxel Dörfler * If WILD_STOP_AT_DIR is defined, it won't match '/' */
180*17049c45SAxel Dörfler if (c == WILDCHAR)
181*17049c45SAxel Dörfler #ifdef WILD_STOP_AT_DIR
182*17049c45SAxel Dörfler return (*s && *s != '/') ? recmatch(p, s + CLEN(s), ic) : 0;
183*17049c45SAxel Dörfler #else
184*17049c45SAxel Dörfler return *s ? recmatch(p, s + CLEN(s), ic) : 0;
185*17049c45SAxel Dörfler #endif
186*17049c45SAxel Dörfler
187*17049c45SAxel Dörfler /* '*' matches any number of characters, including zero */
188*17049c45SAxel Dörfler #ifdef AMIGA
189*17049c45SAxel Dörfler if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
190*17049c45SAxel Dörfler c = '*', p++;
191*17049c45SAxel Dörfler #endif /* AMIGA */
192*17049c45SAxel Dörfler if (c == '*') {
193*17049c45SAxel Dörfler #ifdef WILD_STOP_AT_DIR
194*17049c45SAxel Dörfler # ifdef AMIGA
195*17049c45SAxel Dörfler if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
196*17049c45SAxel Dörfler c = '*', p++;
197*17049c45SAxel Dörfler if (c != '*') {
198*17049c45SAxel Dörfler # else /* !AMIGA */
199*17049c45SAxel Dörfler if (*p != '*') {
200*17049c45SAxel Dörfler # endif /* ?AMIGA */
201*17049c45SAxel Dörfler /* single '*': this doesn't match slashes */
202*17049c45SAxel Dörfler for (; *s && *s != '/'; INCSTR(s))
203*17049c45SAxel Dörfler if ((c = recmatch(p, s, ic)) != 0)
204*17049c45SAxel Dörfler return (int)c;
205*17049c45SAxel Dörfler /* end of pattern: matched if at end of string, else continue */
206*17049c45SAxel Dörfler if (*p == 0)
207*17049c45SAxel Dörfler return (*s == 0);
208*17049c45SAxel Dörfler /* continue to match if at '/' in pattern, else give up */
209*17049c45SAxel Dörfler return (*p == '/' || (*p == '\\' && p[1] == '/'))
210*17049c45SAxel Dörfler ? recmatch(p, s, ic) : 2;
211*17049c45SAxel Dörfler }
212*17049c45SAxel Dörfler /* '**': this matches slashes */
213*17049c45SAxel Dörfler ++p; /* move p behind the second '*' */
214*17049c45SAxel Dörfler /* continue with the non-WILD_STOP_AT_DIR code variant */
215*17049c45SAxel Dörfler #endif /* WILD_STOP_AT_DIR */
216*17049c45SAxel Dörfler if (*p == 0)
217*17049c45SAxel Dörfler return 1;
218*17049c45SAxel Dörfler for (; *s; INCSTR(s))
219*17049c45SAxel Dörfler if ((c = recmatch(p, s, ic)) != 0)
220*17049c45SAxel Dörfler return (int)c;
221*17049c45SAxel Dörfler return 2; /* 2 means give up--match will return false */
222*17049c45SAxel Dörfler }
223*17049c45SAxel Dörfler
224*17049c45SAxel Dörfler /* Parse and process the list of characters and ranges in brackets */
225*17049c45SAxel Dörfler if (c == BEG_RANGE) {
226*17049c45SAxel Dörfler int e; /* flag true if next char to be taken literally */
227*17049c45SAxel Dörfler ZCONST uch *q; /* pointer to end of [-] group */
228*17049c45SAxel Dörfler int r; /* flag true to match anything but the range */
229*17049c45SAxel Dörfler
230*17049c45SAxel Dörfler if (*s == 0) /* need a character to match */
231*17049c45SAxel Dörfler return 0;
232*17049c45SAxel Dörfler p += (r = (*p == '!' || *p == '^')); /* see if reverse */
233*17049c45SAxel Dörfler for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */
234*17049c45SAxel Dörfler if (e)
235*17049c45SAxel Dörfler e = 0;
236*17049c45SAxel Dörfler else
237*17049c45SAxel Dörfler if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
238*17049c45SAxel Dörfler e = 1;
239*17049c45SAxel Dörfler else if (*q == END_RANGE)
240*17049c45SAxel Dörfler break;
241*17049c45SAxel Dörfler if (*q != END_RANGE) /* nothing matches if bad syntax */
242*17049c45SAxel Dörfler return 0;
243*17049c45SAxel Dörfler for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
244*17049c45SAxel Dörfler /* go through the list */
245*17049c45SAxel Dörfler if (!e && *p == '\\') /* set escape flag if \ */
246*17049c45SAxel Dörfler e = 1;
247*17049c45SAxel Dörfler else if (!e && *p == '-') /* set start of range if - */
248*17049c45SAxel Dörfler c = *(p-1);
249*17049c45SAxel Dörfler else {
250*17049c45SAxel Dörfler unsigned int cc = Case(*s);
251*17049c45SAxel Dörfler
252*17049c45SAxel Dörfler if (*(p+1) != '-')
253*17049c45SAxel Dörfler for (c = c ? c : *p; c <= *p; c++) /* compare range */
254*17049c45SAxel Dörfler if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
255*17049c45SAxel Dörfler return r ? 0 : recmatch(q + 1, s + 1, ic);
256*17049c45SAxel Dörfler c = e = 0; /* clear range, escape flags */
257*17049c45SAxel Dörfler }
258*17049c45SAxel Dörfler }
259*17049c45SAxel Dörfler return r ? recmatch(q + CLEN(q), s + CLEN(s), ic) : 0;
260*17049c45SAxel Dörfler /* bracket match failed */
261*17049c45SAxel Dörfler }
262*17049c45SAxel Dörfler
263*17049c45SAxel Dörfler /* if escape ('\'), just compare next character */
264*17049c45SAxel Dörfler if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
265*17049c45SAxel Dörfler return 0;
266*17049c45SAxel Dörfler
267*17049c45SAxel Dörfler /* just a character--compare it */
268*17049c45SAxel Dörfler #ifdef QDOS
269*17049c45SAxel Dörfler return QMatch(Case((uch)c), Case(*s)) ? recmatch(p, s + CLEN(s), ic) : 0;
270*17049c45SAxel Dörfler #else
271*17049c45SAxel Dörfler return Case((uch)c) == Case(*s) ? recmatch(p, s + CLEN(s), ic) : 0;
272*17049c45SAxel Dörfler #endif
273*17049c45SAxel Dörfler
274*17049c45SAxel Dörfler } /* end function recmatch() */
275*17049c45SAxel Dörfler
276*17049c45SAxel Dörfler #endif /* !THEOS */
277*17049c45SAxel Dörfler
278*17049c45SAxel Dörfler
279*17049c45SAxel Dörfler
280*17049c45SAxel Dörfler
iswild(p)281*17049c45SAxel Dörfler int iswild(p) /* originally only used for stat()-bug workaround in */
282*17049c45SAxel Dörfler ZCONST char *p; /* VAX C, Turbo/Borland C, Watcom C, Atari MiNT libs; */
283*17049c45SAxel Dörfler { /* now used in process_zipfiles() as well */
284*17049c45SAxel Dörfler for (; *p; INCSTR(p))
285*17049c45SAxel Dörfler if (*p == '\\' && *(p+1))
286*17049c45SAxel Dörfler ++p;
287*17049c45SAxel Dörfler #ifdef THEOS
288*17049c45SAxel Dörfler else if (*p == '?' || *p == '*' || *p=='#'|| *p == '@')
289*17049c45SAxel Dörfler #else /* !THEOS */
290*17049c45SAxel Dörfler #ifdef VMS
291*17049c45SAxel Dörfler else if (*p == '%' || *p == '*')
292*17049c45SAxel Dörfler #else /* !VMS */
293*17049c45SAxel Dörfler #ifdef AMIGA
294*17049c45SAxel Dörfler else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[')
295*17049c45SAxel Dörfler #else /* !AMIGA */
296*17049c45SAxel Dörfler else if (*p == '?' || *p == '*' || *p == '[')
297*17049c45SAxel Dörfler #endif /* ?AMIGA */
298*17049c45SAxel Dörfler #endif /* ?VMS */
299*17049c45SAxel Dörfler #endif /* ?THEOS */
300*17049c45SAxel Dörfler #ifdef QDOS
301*17049c45SAxel Dörfler return (int)p;
302*17049c45SAxel Dörfler #else
303*17049c45SAxel Dörfler return TRUE;
304*17049c45SAxel Dörfler #endif
305*17049c45SAxel Dörfler
306*17049c45SAxel Dörfler return FALSE;
307*17049c45SAxel Dörfler
308*17049c45SAxel Dörfler } /* end function iswild() */
309*17049c45SAxel Dörfler
310*17049c45SAxel Dörfler
311*17049c45SAxel Dörfler
312*17049c45SAxel Dörfler
313*17049c45SAxel Dörfler
314*17049c45SAxel Dörfler #ifdef TEST_MATCH
315*17049c45SAxel Dörfler
316*17049c45SAxel Dörfler #define put(s) {fputs(s,stdout); fflush(stdout);}
317*17049c45SAxel Dörfler #ifdef main
318*17049c45SAxel Dörfler # undef main
319*17049c45SAxel Dörfler #endif
320*17049c45SAxel Dörfler
main(int argc,char ** argv)321*17049c45SAxel Dörfler int main(int argc, char **argv)
322*17049c45SAxel Dörfler {
323*17049c45SAxel Dörfler char pat[256], str[256];
324*17049c45SAxel Dörfler
325*17049c45SAxel Dörfler for (;;) {
326*17049c45SAxel Dörfler put("Pattern (return to exit): ");
327*17049c45SAxel Dörfler gets(pat);
328*17049c45SAxel Dörfler if (!pat[0])
329*17049c45SAxel Dörfler break;
330*17049c45SAxel Dörfler for (;;) {
331*17049c45SAxel Dörfler put("String (return for new pattern): ");
332*17049c45SAxel Dörfler gets(str);
333*17049c45SAxel Dörfler if (!str[0])
334*17049c45SAxel Dörfler break;
335*17049c45SAxel Dörfler printf("Case sensitive: %s insensitive: %s\n",
336*17049c45SAxel Dörfler match(str, pat, 0) ? "YES" : "NO",
337*17049c45SAxel Dörfler match(str, pat, 1) ? "YES" : "NO");
338*17049c45SAxel Dörfler }
339*17049c45SAxel Dörfler }
340*17049c45SAxel Dörfler EXIT(0);
341*17049c45SAxel Dörfler }
342*17049c45SAxel Dörfler
343*17049c45SAxel Dörfler #endif /* TEST_MATCH */
344