xref: /haiku/src/system/libroot/posix/glob.c (revision e81a954787e50e56a7f06f72705b7859b6ab06d1)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #if defined(LIBC_SCCS) && !defined(lint)
34 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
35 #endif /* LIBC_SCCS and not lint */
36 #include <sys/cdefs.h>
37 //__FBSDID("$FreeBSD: src/lib/libc/gen/glob.c,v 1.27 2008/06/26 07:12:35 mtm Exp $");
38 
39 /*
40  * glob(3) -- a superset of the one defined in POSIX 1003.2.
41  *
42  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
43  *
44  * Optional extra services, controlled by flags not defined by POSIX:
45  *
46  * GLOB_QUOTE:
47  *	Escaping convention: \ inhibits any special meaning the following
48  *	character might have (except \ at end of string is retained).
49  * GLOB_MAGCHAR:
50  *	Set in gl_flags if pattern contained a globbing character.
51  * GLOB_NOMAGIC:
52  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
53  *	not contain any magic characters.  [Used in csh style globbing]
54  * GLOB_ALTDIRFUNC:
55  *	Use alternately specified directory access functions.
56  * GLOB_TILDE:
57  *	expand ~user/foo to the /home/dir/of/user/foo
58  * GLOB_BRACE:
59  *	expand {1,2}{a,b} to 1a 1b 2a 2b
60  * gl_matchc:
61  *	Number of matches in the current invocation of glob.
62  */
63 
64 /*
65  * Some notes on multibyte character support:
66  * 1. Patterns with illegal byte sequences match nothing - even if
67  *    GLOB_NOCHECK is specified.
68  * 2. Illegal byte sequences in filenames are handled by treating them as
69  *    single-byte characters with a value of the first byte of the sequence
70  *    cast to wchar_t.
71  * 3. State-dependent encodings are not currently supported.
72  */
73 
74 #include <sys/param.h>
75 #include <sys/stat.h>
76 
77 #include <ctype.h>
78 #include <dirent.h>
79 #include <errno.h>
80 #include <glob.h>
81 #include <limits.h>
82 #include <pwd.h>
83 #include <stdint.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <unistd.h>
88 
89 #include <errno_private.h>
90 #include <wchar_private.h>
91 
92 #ifndef __HAIKU__
93 #	include "collate.h"
94 #endif
95 
96 #define	DOLLAR		'$'
97 #define	DOT		'.'
98 #define	EOS		'\0'
99 #define	LBRACKET	'['
100 #define	NOT		'!'
101 #define	QUESTION	'?'
102 #define	QUOTE		'\\'
103 #define	RANGE		'-'
104 #define	RBRACKET	']'
105 #define	SEP		'/'
106 #define	STAR		'*'
107 #define	TILDE		'~'
108 #define	UNDERSCORE	'_'
109 #define	LBRACE		'{'
110 #define	RBRACE		'}'
111 #define	SLASH		'/'
112 #define	COMMA		','
113 
114 #ifndef DEBUG
115 
116 #define	M_QUOTE		0x8000000000ULL
117 #define	M_PROTECT	0x4000000000ULL
118 #define	M_MASK		0xffffffffffULL
119 #define	M_CHAR		0x00ffffffffULL
120 
121 typedef uint_fast64_t Char;
122 
123 #else
124 
125 #define	M_QUOTE		0x80
126 #define	M_PROTECT	0x40
127 #define	M_MASK		0xff
128 #define	M_CHAR		0x7f
129 
130 typedef char Char;
131 
132 #endif
133 
134 
135 #define	CHAR(c)		((Char)((c)&M_CHAR))
136 #define	META(c)		((Char)((c)|M_QUOTE))
137 #define	M_ALL		META('*')
138 #define	M_END		META(']')
139 #define	M_NOT		META('!')
140 #define	M_ONE		META('?')
141 #define	M_RNG		META('-')
142 #define	M_SET		META('[')
143 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
144 
145 
146 static int	 compare(const void *, const void *);
147 static int	 g_Ctoc(const Char *, char *, size_t);
148 static int	 g_lstat(Char *, struct stat *, glob_t *);
149 static DIR	*g_opendir(Char *, glob_t *);
150 static const Char *g_strchr(const Char *, wchar_t);
151 #ifdef notdef
152 static Char	*g_strcat(Char *, const Char *);
153 #endif
154 static int	 g_stat(Char *, struct stat *, glob_t *);
155 static int	 glob0(const Char *, glob_t *, size_t *);
156 static int	 glob1(Char *, glob_t *, size_t *);
157 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
158 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
159 static int	 globextend(const Char *, glob_t *, size_t *);
160 static const Char *
161 		 globtilde(const Char *, Char *, size_t, glob_t *);
162 static int	 globexp1(const Char *, glob_t *, size_t *);
163 static int	 globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
164 static int	 match(Char *, Char *, Char *);
165 #ifdef DEBUG
166 static void	 qprintf(const char *, Char *);
167 #endif
168 
169 
170 
171 int
172 glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
173 {
174 	const char *patnext;
175 	size_t limit;
176 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
177 	mbstate_t mbs;
178 	wchar_t wc;
179 	size_t clen;
180 
181 	patnext = pattern;
182 	if (!(flags & GLOB_APPEND)) {
183 		pglob->gl_pathc = 0;
184 		pglob->gl_pathv = NULL;
185 		if (!(flags & GLOB_DOOFFS))
186 			pglob->gl_offs = 0;
187 	}
188 	if (flags & GLOB_LIMIT) {
189 		limit = pglob->gl_matchc;
190 		if (limit == 0)
191 			limit = ARG_MAX;
192 	} else
193 		limit = 0;
194 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
195 	pglob->gl_errfunc = errfunc;
196 	pglob->gl_matchc = 0;
197 
198 	bufnext = patbuf;
199 	bufend = bufnext + MAXPATHLEN - 1;
200 	if (flags & GLOB_NOESCAPE) {
201 		memset(&mbs, 0, sizeof(mbs));
202 		while (bufend - bufnext >= MB_CUR_MAX) {
203 			clen = __mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
204 			if (clen == (size_t)-1 || clen == (size_t)-2)
205 				return (GLOB_NOMATCH);
206 			else if (clen == 0)
207 				break;
208 			*bufnext++ = wc;
209 			patnext += clen;
210 		}
211 	} else {
212 		/* Protect the quoted characters. */
213 		memset(&mbs, 0, sizeof(mbs));
214 		while (bufend - bufnext >= MB_CUR_MAX) {
215 			if (*patnext == QUOTE) {
216 				if (*++patnext == EOS) {
217 					*bufnext++ = QUOTE | M_PROTECT;
218 					continue;
219 				}
220 				prot = M_PROTECT;
221 			} else
222 				prot = 0;
223 			clen = __mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
224 			if (clen == (size_t)-1 || clen == (size_t)-2)
225 				return (GLOB_NOMATCH);
226 			else if (clen == 0)
227 				break;
228 			*bufnext++ = wc | prot;
229 			patnext += clen;
230 		}
231 	}
232 	*bufnext = EOS;
233 
234 	if (flags & GLOB_BRACE)
235 	    return globexp1(patbuf, pglob, &limit);
236 	else
237 	    return glob0(patbuf, pglob, &limit);
238 }
239 
240 /*
241  * Expand recursively a glob {} pattern. When there is no more expansion
242  * invoke the standard globbing routine to glob the rest of the magic
243  * characters
244  */
245 static int
246 globexp1(const Char *pattern, glob_t *pglob, size_t *limit)
247 {
248 	const Char* ptr = pattern;
249 	int rv;
250 
251 	/* Protect a single {}, for find(1), like csh */
252 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
253 		return glob0(pattern, pglob, limit);
254 
255 	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
256 		if (!globexp2(ptr, pattern, pglob, &rv, limit))
257 			return rv;
258 
259 	return glob0(pattern, pglob, limit);
260 }
261 
262 
263 /*
264  * Recursive brace globbing helper. Tries to expand a single brace.
265  * If it succeeds then it invokes globexp1 with the new pattern.
266  * If it fails then it tries to glob the rest of the pattern and returns.
267  */
268 static int
269 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
270 {
271 	int     i;
272 	Char   *lm, *ls;
273 	const Char *pe, *pm, *pm1, *pl;
274 	Char    patbuf[MAXPATHLEN];
275 
276 	/* copy part up to the brace */
277 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
278 		continue;
279 	*lm = EOS;
280 	ls = lm;
281 
282 	/* Find the balanced brace */
283 	for (i = 0, pe = ++ptr; *pe; pe++)
284 		if (*pe == LBRACKET) {
285 			/* Ignore everything between [] */
286 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
287 				continue;
288 			if (*pe == EOS) {
289 				/*
290 				 * We could not find a matching RBRACKET.
291 				 * Ignore and just look for RBRACE
292 				 */
293 				pe = pm;
294 			}
295 		}
296 		else if (*pe == LBRACE)
297 			i++;
298 		else if (*pe == RBRACE) {
299 			if (i == 0)
300 				break;
301 			i--;
302 		}
303 
304 	/* Non matching braces; just glob the pattern */
305 	if (i != 0 || *pe == EOS) {
306 		*rv = glob0(patbuf, pglob, limit);
307 		return 0;
308 	}
309 
310 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
311 		switch (*pm) {
312 		case LBRACKET:
313 			/* Ignore everything between [] */
314 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
315 				continue;
316 			if (*pm == EOS) {
317 				/*
318 				 * We could not find a matching RBRACKET.
319 				 * Ignore and just look for RBRACE
320 				 */
321 				pm = pm1;
322 			}
323 			break;
324 
325 		case LBRACE:
326 			i++;
327 			break;
328 
329 		case RBRACE:
330 			if (i) {
331 			    i--;
332 			    break;
333 			}
334 			/* FALLTHROUGH */
335 		case COMMA:
336 			if (i && *pm == COMMA)
337 				break;
338 			else {
339 				/* Append the current string */
340 				for (lm = ls; (pl < pm); *lm++ = *pl++)
341 					continue;
342 				/*
343 				 * Append the rest of the pattern after the
344 				 * closing brace
345 				 */
346 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
347 					continue;
348 
349 				/* Expand the current pattern */
350 #ifdef DEBUG
351 				qprintf("globexp2:", patbuf);
352 #endif
353 				*rv = globexp1(patbuf, pglob, limit);
354 
355 				/* move after the comma, to the next string */
356 				pl = pm + 1;
357 			}
358 			break;
359 
360 		default:
361 			break;
362 		}
363 	*rv = 0;
364 	return 0;
365 }
366 
367 
368 
369 /*
370  * expand tilde from the passwd file.
371  */
372 static const Char *
373 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
374 {
375 	struct passwd *pwd;
376 	char *h;
377 	const Char *p;
378 	Char *b, *eb;
379 
380 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
381 		return pattern;
382 
383 	/*
384 	 * Copy up to the end of the string or /
385 	 */
386 	eb = &patbuf[patbuf_len - 1];
387 	for (p = pattern + 1, h = (char *) patbuf;
388 	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
389 		continue;
390 
391 	*h = EOS;
392 
393 	if (((char *) patbuf)[0] == EOS) {
394 		/*
395 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
396 		 * we're not running setuid or setgid) and then trying
397 		 * the password file
398 		 */
399 		if (
400 #ifndef __HAIKU__
401 		    issetugid() != 0 ||
402 #endif
403 		    (h = getenv("HOME")) == NULL) {
404 			if (((h = getlogin()) != NULL &&
405 			     (pwd = getpwnam(h)) != NULL) ||
406 			    (pwd = getpwuid(getuid())) != NULL)
407 				h = pwd->pw_dir;
408 			else
409 				return pattern;
410 		}
411 	}
412 	else {
413 		/*
414 		 * Expand a ~user
415 		 */
416 		if ((pwd = getpwnam((char*) patbuf)) == NULL)
417 			return pattern;
418 		else
419 			h = pwd->pw_dir;
420 	}
421 
422 	/* Copy the home directory */
423 	for (b = patbuf; b < eb && *h; *b++ = *h++)
424 		continue;
425 
426 	/* Append the rest of the pattern */
427 	while (b < eb && (*b++ = *p++) != EOS)
428 		continue;
429 	*b = EOS;
430 
431 	return patbuf;
432 }
433 
434 
435 /*
436  * The main glob() routine: compiles the pattern (optionally processing
437  * quotes), calls glob1() to do the real pattern matching, and finally
438  * sorts the list (unless unsorted operation is requested).  Returns 0
439  * if things went well, nonzero if errors occurred.
440  */
441 static int
442 glob0(const Char *pattern, glob_t *pglob, size_t *limit)
443 {
444 	const Char *qpatnext;
445 	int c, err;
446 	size_t oldpathc;
447 	Char *bufnext, patbuf[MAXPATHLEN];
448 
449 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
450 	oldpathc = pglob->gl_pathc;
451 	bufnext = patbuf;
452 
453 	/* We don't need to check for buffer overflow any more. */
454 	while ((c = *qpatnext++) != EOS) {
455 		switch (c) {
456 		case LBRACKET:
457 			c = *qpatnext;
458 			if (c == NOT)
459 				++qpatnext;
460 			if (*qpatnext == EOS ||
461 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
462 				*bufnext++ = LBRACKET;
463 				if (c == NOT)
464 					--qpatnext;
465 				break;
466 			}
467 			*bufnext++ = M_SET;
468 			if (c == NOT)
469 				*bufnext++ = M_NOT;
470 			c = *qpatnext++;
471 			do {
472 				*bufnext++ = CHAR(c);
473 				if (*qpatnext == RANGE &&
474 				    (c = qpatnext[1]) != RBRACKET) {
475 					*bufnext++ = M_RNG;
476 					*bufnext++ = CHAR(c);
477 					qpatnext += 2;
478 				}
479 			} while ((c = *qpatnext++) != RBRACKET);
480 			pglob->gl_flags |= GLOB_MAGCHAR;
481 			*bufnext++ = M_END;
482 			break;
483 		case QUESTION:
484 			pglob->gl_flags |= GLOB_MAGCHAR;
485 			*bufnext++ = M_ONE;
486 			break;
487 		case STAR:
488 			pglob->gl_flags |= GLOB_MAGCHAR;
489 			/* collapse adjacent stars to one,
490 			 * to avoid exponential behavior
491 			 */
492 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
493 			    *bufnext++ = M_ALL;
494 			break;
495 		default:
496 			*bufnext++ = CHAR(c);
497 			break;
498 		}
499 	}
500 	*bufnext = EOS;
501 #ifdef DEBUG
502 	qprintf("glob0:", patbuf);
503 #endif
504 
505 	if ((err = glob1(patbuf, pglob, limit)) != 0)
506 		return(err);
507 
508 	/*
509 	 * If there was no match we are going to append the pattern
510 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
511 	 * and the pattern did not contain any magic characters
512 	 * GLOB_NOMAGIC is there just for compatibility with csh.
513 	 */
514 	if (pglob->gl_pathc == oldpathc) {
515 		if (((pglob->gl_flags & GLOB_NOCHECK) ||
516 		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
517 			!(pglob->gl_flags & GLOB_MAGCHAR))))
518 			return(globextend(pattern, pglob, limit));
519 		else
520 			return(GLOB_NOMATCH);
521 	}
522 	if (!(pglob->gl_flags & GLOB_NOSORT))
523 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
524 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
525 	return(0);
526 }
527 
528 static int
529 compare(const void *p, const void *q)
530 {
531 	return(strcmp(*(char **)p, *(char **)q));
532 }
533 
534 static int
535 glob1(Char *pattern, glob_t *pglob, size_t *limit)
536 {
537 	Char pathbuf[MAXPATHLEN];
538 
539 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
540 	if (*pattern == EOS)
541 		return(0);
542 	return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
543 	    pattern, pglob, limit));
544 }
545 
546 /*
547  * The functions glob2 and glob3 are mutually recursive; there is one level
548  * of recursion for each segment in the pattern that contains one or more
549  * meta characters.
550  */
551 static int
552 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
553       glob_t *pglob, size_t *limit)
554 {
555 	struct stat sb;
556 	Char *p, *q;
557 	int anymeta;
558 
559 	/*
560 	 * Loop over pattern segments until end of pattern or until
561 	 * segment with meta character found.
562 	 */
563 	for (anymeta = 0;;) {
564 		if (*pattern == EOS) {		/* End of pattern? */
565 			*pathend = EOS;
566 			if (g_lstat(pathbuf, &sb, pglob))
567 				return(0);
568 
569 			if (((pglob->gl_flags & GLOB_MARK) &&
570 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
571 			    || (S_ISLNK(sb.st_mode) &&
572 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
573 			    S_ISDIR(sb.st_mode)))) {
574 				if (pathend + 1 > pathend_last)
575 					return (GLOB_ABORTED);
576 				*pathend++ = SEP;
577 				*pathend = EOS;
578 			}
579 			++pglob->gl_matchc;
580 			return(globextend(pathbuf, pglob, limit));
581 		}
582 
583 		/* Find end of next segment, copy tentatively to pathend. */
584 		q = pathend;
585 		p = pattern;
586 		while (*p != EOS && *p != SEP) {
587 			if (ismeta(*p))
588 				anymeta = 1;
589 			if (q + 1 > pathend_last)
590 				return (GLOB_ABORTED);
591 			*q++ = *p++;
592 		}
593 
594 		if (!anymeta) {		/* No expansion, do next segment. */
595 			pathend = q;
596 			pattern = p;
597 			while (*pattern == SEP) {
598 				if (pathend + 1 > pathend_last)
599 					return (GLOB_ABORTED);
600 				*pathend++ = *pattern++;
601 			}
602 		} else			/* Need expansion, recurse. */
603 			return(glob3(pathbuf, pathend, pathend_last, pattern, p,
604 			    pglob, limit));
605 	}
606 	/* NOTREACHED */
607 }
608 
609 static int
610 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
611       Char *pattern, Char *restpattern,
612       glob_t *pglob, size_t *limit)
613 {
614 	struct dirent *dp;
615 	DIR *dirp;
616 	int err;
617 	char buf[MAXPATHLEN];
618 
619 	/*
620 	 * The readdirfunc declaration can't be prototyped, because it is
621 	 * assigned, below, to two functions which are prototyped in glob.h
622 	 * and dirent.h as taking pointers to differently typed opaque
623 	 * structures.
624 	 */
625 	struct dirent *(*readdirfunc)();
626 
627 	if (pathend > pathend_last)
628 		return (GLOB_ABORTED);
629 	*pathend = EOS;
630 	__set_errno(0);
631 
632 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
633 		/* TODO: don't call for ENOENT or ENOTDIR? */
634 		if (pglob->gl_errfunc) {
635 			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
636 				return (GLOB_ABORTED);
637 			if (pglob->gl_errfunc(buf, errno) ||
638 			    pglob->gl_flags & GLOB_ERR)
639 				return (GLOB_ABORTED);
640 		}
641 		return(0);
642 	}
643 
644 	err = 0;
645 
646 	/* Search directory for matching names. */
647 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
648 		readdirfunc = pglob->gl_readdir;
649 	else
650 		readdirfunc = readdir;
651 	while ((dp = (*readdirfunc)(dirp))) {
652 		char *sc;
653 		Char *dc;
654 		wchar_t wc;
655 		size_t clen;
656 		mbstate_t mbs;
657 
658 		/* Initial DOT must be matched literally. */
659 		if (dp->d_name[0] == DOT && *pattern != DOT)
660 			continue;
661 		memset(&mbs, 0, sizeof(mbs));
662 		dc = pathend;
663 		sc = dp->d_name;
664 		while (dc < pathend_last) {
665 			clen = __mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
666 			if (clen == (size_t)-1 || clen == (size_t)-2) {
667 				wc = *sc;
668 				clen = 1;
669 				memset(&mbs, 0, sizeof(mbs));
670 			}
671 			if ((*dc++ = wc) == EOS)
672 				break;
673 			sc += clen;
674 		}
675 		if (!match(pathend, pattern, restpattern)) {
676 			*pathend = EOS;
677 			continue;
678 		}
679 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
680 		    pglob, limit);
681 		if (err)
682 			break;
683 	}
684 
685 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
686 		(*pglob->gl_closedir)(dirp);
687 	else
688 		closedir(dirp);
689 	return(err);
690 }
691 
692 
693 /*
694  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
695  * add the new item, and update gl_pathc.
696  *
697  * This assumes the BSD realloc, which only copies the block when its size
698  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
699  * behavior.
700  *
701  * Return 0 if new item added, error code if memory couldn't be allocated.
702  *
703  * Invariant of the glob_t structure:
704  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
705  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
706  */
707 static int
708 globextend(const Char *path, glob_t *pglob, size_t *limit)
709 {
710 	char **pathv;
711 	size_t i, newsize, len;
712 	char *copy;
713 	const Char *p;
714 
715 	if (*limit && pglob->gl_pathc > *limit) {
716 		__set_errno(0);
717 		return (GLOB_NOSPACE);
718 	}
719 
720 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
721 	pathv = pglob->gl_pathv ?
722 		    realloc((char *)pglob->gl_pathv, newsize) :
723 		    malloc(newsize);
724 	if (pathv == NULL) {
725 		if (pglob->gl_pathv) {
726 			free(pglob->gl_pathv);
727 			pglob->gl_pathv = NULL;
728 		}
729 		return(GLOB_NOSPACE);
730 	}
731 
732 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
733 		/* first time around -- clear initial gl_offs items */
734 		pathv += pglob->gl_offs;
735 		for (i = pglob->gl_offs + 1; --i > 0; )
736 			*--pathv = NULL;
737 	}
738 	pglob->gl_pathv = pathv;
739 
740 	for (p = path; *p++;)
741 		continue;
742 	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
743 	if ((copy = malloc(len)) != NULL) {
744 		if (g_Ctoc(path, copy, len)) {
745 			free(copy);
746 			return (GLOB_NOSPACE);
747 		}
748 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
749 	}
750 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
751 	return(copy == NULL ? GLOB_NOSPACE : 0);
752 }
753 
754 /*
755  * pattern matching function for filenames.  Each occurrence of the *
756  * pattern causes a recursion level.
757  */
758 static int
759 match(Char *name, Char *pat, Char *patend)
760 {
761 	int ok, negate_range;
762 	Char c, k;
763 
764 	while (pat < patend) {
765 		c = *pat++;
766 		switch (c & M_MASK) {
767 		case M_ALL:
768 			if (pat == patend)
769 				return(1);
770 			do
771 			    if (match(name, pat, patend))
772 				    return(1);
773 			while (*name++ != EOS);
774 			return(0);
775 		case M_ONE:
776 			if (*name++ == EOS)
777 				return(0);
778 			break;
779 		case M_SET:
780 			ok = 0;
781 			if ((k = *name++) == EOS)
782 				return(0);
783 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
784 				++pat;
785 			while (((c = *pat++) & M_MASK) != M_END)
786 				if ((*pat & M_MASK) == M_RNG) {
787 					if (
788 #ifndef __HAIKU__
789 					    __collate_load_error ?
790 #endif
791 					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])
792 #ifndef __HAIKU__
793 					    :
794 					       __collate_range_cmp(CHAR(c), CHAR(k)) <= 0
795 					    && __collate_range_cmp(CHAR(k), CHAR(pat[1])) <= 0
796 #endif
797 					   )
798 						ok = 1;
799 					pat += 2;
800 				} else if (c == k)
801 					ok = 1;
802 			if (ok == negate_range)
803 				return(0);
804 			break;
805 		default:
806 			if (*name++ != c)
807 				return(0);
808 			break;
809 		}
810 	}
811 	return(*name == EOS);
812 }
813 
814 /* Free allocated data belonging to a glob_t structure. */
815 void
816 globfree(glob_t *pglob)
817 {
818 	size_t i;
819 	char **pp;
820 
821 	if (pglob->gl_pathv != NULL) {
822 		pp = pglob->gl_pathv + pglob->gl_offs;
823 		for (i = pglob->gl_pathc; i--; ++pp)
824 			if (*pp)
825 				free(*pp);
826 		free(pglob->gl_pathv);
827 		pglob->gl_pathv = NULL;
828 	}
829 }
830 
831 static DIR *
832 g_opendir(Char *str, glob_t *pglob)
833 {
834 	char buf[MAXPATHLEN];
835 
836 	if (!*str)
837 		strcpy(buf, ".");
838 	else {
839 		if (g_Ctoc(str, buf, sizeof(buf)))
840 			return (NULL);
841 	}
842 
843 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
844 		return((*pglob->gl_opendir)(buf));
845 
846 	return(opendir(buf));
847 }
848 
849 static int
850 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
851 {
852 	char buf[MAXPATHLEN];
853 
854 	if (g_Ctoc(fn, buf, sizeof(buf))) {
855 		__set_errno(ENAMETOOLONG);
856 		return (-1);
857 	}
858 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
859 		return((*pglob->gl_lstat)(buf, sb));
860 	return(lstat(buf, sb));
861 }
862 
863 static int
864 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
865 {
866 	char buf[MAXPATHLEN];
867 
868 	if (g_Ctoc(fn, buf, sizeof(buf))) {
869 		__set_errno(ENAMETOOLONG);
870 		return (-1);
871 	}
872 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
873 		return((*pglob->gl_stat)(buf, sb));
874 	return(stat(buf, sb));
875 }
876 
877 static const Char *
878 g_strchr(const Char *str, wchar_t ch)
879 {
880 
881 	do {
882 		if (*str == ch)
883 			return (str);
884 	} while (*str++);
885 	return (NULL);
886 }
887 
888 static int
889 g_Ctoc(const Char *str, char *buf, size_t len)
890 {
891 	mbstate_t mbs;
892 	size_t clen;
893 
894 	memset(&mbs, 0, sizeof(mbs));
895 	while (len >= MB_CUR_MAX) {
896 		clen = __wcrtomb(buf, *str, &mbs);
897 		if (clen == (size_t)-1)
898 			return (1);
899 		if (*str == L'\0')
900 			return (0);
901 		str++;
902 		buf += clen;
903 		len -= clen;
904 	}
905 	return (1);
906 }
907 
908 #ifdef DEBUG
909 static void
910 qprintf(const char *str, Char *s)
911 {
912 	Char *p;
913 
914 	(void)printf("%s:\n", str);
915 	for (p = s; *p; p++)
916 		(void)printf("%c", CHAR(*p));
917 	(void)printf("\n");
918 	for (p = s; *p; p++)
919 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
920 	(void)printf("\n");
921 	for (p = s; *p; p++)
922 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
923 	(void)printf("\n");
924 }
925 #endif
926