xref: /haiku/src/system/libroot/posix/glob.c (revision 5074333deda53992b8523d9a96ae9187f8ca2c71)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #if defined(LIBC_SCCS) && !defined(lint)
34 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
35 #endif /* LIBC_SCCS and not lint */
36 #include <sys/cdefs.h>
37 //__FBSDID("$FreeBSD: src/lib/libc/gen/glob.c,v 1.27 2008/06/26 07:12:35 mtm Exp $");
38 
39 /*
40  * glob(3) -- a superset of the one defined in POSIX 1003.2.
41  *
42  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
43  *
44  * Optional extra services, controlled by flags not defined by POSIX:
45  *
46  * GLOB_QUOTE:
47  *	Escaping convention: \ inhibits any special meaning the following
48  *	character might have (except \ at end of string is retained).
49  * GLOB_MAGCHAR:
50  *	Set in gl_flags if pattern contained a globbing character.
51  * GLOB_NOMAGIC:
52  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
53  *	not contain any magic characters.  [Used in csh style globbing]
54  * GLOB_ALTDIRFUNC:
55  *	Use alternately specified directory access functions.
56  * GLOB_TILDE:
57  *	expand ~user/foo to the /home/dir/of/user/foo
58  * GLOB_BRACE:
59  *	expand {1,2}{a,b} to 1a 1b 2a 2b
60  * gl_matchc:
61  *	Number of matches in the current invocation of glob.
62  */
63 
64 /*
65  * Some notes on multibyte character support:
66  * 1. Patterns with illegal byte sequences match nothing - even if
67  *    GLOB_NOCHECK is specified.
68  * 2. Illegal byte sequences in filenames are handled by treating them as
69  *    single-byte characters with a value of the first byte of the sequence
70  *    cast to wchar_t.
71  * 3. State-dependent encodings are not currently supported.
72  */
73 
74 #include <sys/param.h>
75 #include <sys/stat.h>
76 
77 #include <ctype.h>
78 #include <dirent.h>
79 #include <errno.h>
80 #include <glob.h>
81 #include <limits.h>
82 #include <pwd.h>
83 #include <stdint.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <unistd.h>
88 #include <wchar.h>
89 
90 #ifndef __HAIKU__
91 #	include "collate.h"
92 #endif
93 
94 #define	DOLLAR		'$'
95 #define	DOT		'.'
96 #define	EOS		'\0'
97 #define	LBRACKET	'['
98 #define	NOT		'!'
99 #define	QUESTION	'?'
100 #define	QUOTE		'\\'
101 #define	RANGE		'-'
102 #define	RBRACKET	']'
103 #define	SEP		'/'
104 #define	STAR		'*'
105 #define	TILDE		'~'
106 #define	UNDERSCORE	'_'
107 #define	LBRACE		'{'
108 #define	RBRACE		'}'
109 #define	SLASH		'/'
110 #define	COMMA		','
111 
112 #if !defined(DEBUG) && !defined(__HAIKU__)
113 
114 #define	M_QUOTE		0x8000000000ULL
115 #define	M_PROTECT	0x4000000000ULL
116 #define	M_MASK		0xffffffffffULL
117 #define	M_CHAR		0x00ffffffffULL
118 
119 typedef uint_fast64_t Char;
120 
121 #else
122 
123 #define	M_QUOTE		0x80
124 #define	M_PROTECT	0x40
125 #define	M_MASK		0xff
126 #define	M_CHAR		0x7f
127 
128 typedef char Char;
129 
130 #endif
131 
132 
133 #define	CHAR(c)		((Char)((c)&M_CHAR))
134 #define	META(c)		((Char)((c)|M_QUOTE))
135 #define	M_ALL		META('*')
136 #define	M_END		META(']')
137 #define	M_NOT		META('!')
138 #define	M_ONE		META('?')
139 #define	M_RNG		META('-')
140 #define	M_SET		META('[')
141 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
142 
143 
144 static int	 compare(const void *, const void *);
145 static int	 g_Ctoc(const Char *, char *, size_t);
146 static int	 g_lstat(Char *, struct stat *, glob_t *);
147 static DIR	*g_opendir(Char *, glob_t *);
148 static const Char *g_strchr(const Char *, wchar_t);
149 #ifdef notdef
150 static Char	*g_strcat(Char *, const Char *);
151 #endif
152 static int	 g_stat(Char *, struct stat *, glob_t *);
153 static int	 glob0(const Char *, glob_t *, size_t *);
154 static int	 glob1(Char *, glob_t *, size_t *);
155 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *, size_t *);
156 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *, size_t *);
157 static int	 globextend(const Char *, glob_t *, size_t *);
158 static const Char *
159 		 globtilde(const Char *, Char *, size_t, glob_t *);
160 static int	 globexp1(const Char *, glob_t *, size_t *);
161 static int	 globexp2(const Char *, const Char *, glob_t *, int *, size_t *);
162 static int	 match(Char *, Char *, Char *);
163 #ifdef DEBUG
164 static void	 qprintf(const char *, Char *);
165 #endif
166 
167 #ifdef __HAIKU__
168 #	warning glob() wants to use wide character functions
169 
170 static size_t
171 my_wcrtomb(char* to, char wchar, mbstate_t* state)
172 {
173 	*to = wchar;
174 	return 1;
175 }
176 
177 static size_t
178 my_mbrtowc(wchar_t* to, const char* string, size_t len, mbstate_t* state)
179 {
180 	if (!string[0])
181 		return 0;
182 	*to = *string;
183 	return 1;
184 }
185 
186 #	define wcrtomb my_wcrtomb
187 #	define mbrtowc my_mbrtowc
188 #endif
189 
190 
191 int
192 glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob)
193 {
194 	const char *patnext;
195 	size_t limit;
196 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
197 	mbstate_t mbs;
198 	wchar_t wc;
199 	size_t clen;
200 
201 	patnext = pattern;
202 	if (!(flags & GLOB_APPEND)) {
203 		pglob->gl_pathc = 0;
204 		pglob->gl_pathv = NULL;
205 		if (!(flags & GLOB_DOOFFS))
206 			pglob->gl_offs = 0;
207 	}
208 	if (flags & GLOB_LIMIT) {
209 		limit = pglob->gl_matchc;
210 		if (limit == 0)
211 			limit = ARG_MAX;
212 	} else
213 		limit = 0;
214 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
215 	pglob->gl_errfunc = errfunc;
216 	pglob->gl_matchc = 0;
217 
218 	bufnext = patbuf;
219 	bufend = bufnext + MAXPATHLEN - 1;
220 	if (flags & GLOB_NOESCAPE) {
221 		memset(&mbs, 0, sizeof(mbs));
222 		while (bufend - bufnext >= MB_CUR_MAX) {
223 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
224 			if (clen == (size_t)-1 || clen == (size_t)-2)
225 				return (GLOB_NOMATCH);
226 			else if (clen == 0)
227 				break;
228 			*bufnext++ = wc;
229 			patnext += clen;
230 		}
231 	} else {
232 		/* Protect the quoted characters. */
233 		memset(&mbs, 0, sizeof(mbs));
234 		while (bufend - bufnext >= MB_CUR_MAX) {
235 			if (*patnext == QUOTE) {
236 				if (*++patnext == EOS) {
237 					*bufnext++ = QUOTE | M_PROTECT;
238 					continue;
239 				}
240 				prot = M_PROTECT;
241 			} else
242 				prot = 0;
243 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
244 			if (clen == (size_t)-1 || clen == (size_t)-2)
245 				return (GLOB_NOMATCH);
246 			else if (clen == 0)
247 				break;
248 			*bufnext++ = wc | prot;
249 			patnext += clen;
250 		}
251 	}
252 	*bufnext = EOS;
253 
254 	if (flags & GLOB_BRACE)
255 	    return globexp1(patbuf, pglob, &limit);
256 	else
257 	    return glob0(patbuf, pglob, &limit);
258 }
259 
260 /*
261  * Expand recursively a glob {} pattern. When there is no more expansion
262  * invoke the standard globbing routine to glob the rest of the magic
263  * characters
264  */
265 static int
266 globexp1(const Char *pattern, glob_t *pglob, size_t *limit)
267 {
268 	const Char* ptr = pattern;
269 	int rv;
270 
271 	/* Protect a single {}, for find(1), like csh */
272 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
273 		return glob0(pattern, pglob, limit);
274 
275 	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
276 		if (!globexp2(ptr, pattern, pglob, &rv, limit))
277 			return rv;
278 
279 	return glob0(pattern, pglob, limit);
280 }
281 
282 
283 /*
284  * Recursive brace globbing helper. Tries to expand a single brace.
285  * If it succeeds then it invokes globexp1 with the new pattern.
286  * If it fails then it tries to glob the rest of the pattern and returns.
287  */
288 static int
289 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv, size_t *limit)
290 {
291 	int     i;
292 	Char   *lm, *ls;
293 	const Char *pe, *pm, *pm1, *pl;
294 	Char    patbuf[MAXPATHLEN];
295 
296 	/* copy part up to the brace */
297 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
298 		continue;
299 	*lm = EOS;
300 	ls = lm;
301 
302 	/* Find the balanced brace */
303 	for (i = 0, pe = ++ptr; *pe; pe++)
304 		if (*pe == LBRACKET) {
305 			/* Ignore everything between [] */
306 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
307 				continue;
308 			if (*pe == EOS) {
309 				/*
310 				 * We could not find a matching RBRACKET.
311 				 * Ignore and just look for RBRACE
312 				 */
313 				pe = pm;
314 			}
315 		}
316 		else if (*pe == LBRACE)
317 			i++;
318 		else if (*pe == RBRACE) {
319 			if (i == 0)
320 				break;
321 			i--;
322 		}
323 
324 	/* Non matching braces; just glob the pattern */
325 	if (i != 0 || *pe == EOS) {
326 		*rv = glob0(patbuf, pglob, limit);
327 		return 0;
328 	}
329 
330 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
331 		switch (*pm) {
332 		case LBRACKET:
333 			/* Ignore everything between [] */
334 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
335 				continue;
336 			if (*pm == EOS) {
337 				/*
338 				 * We could not find a matching RBRACKET.
339 				 * Ignore and just look for RBRACE
340 				 */
341 				pm = pm1;
342 			}
343 			break;
344 
345 		case LBRACE:
346 			i++;
347 			break;
348 
349 		case RBRACE:
350 			if (i) {
351 			    i--;
352 			    break;
353 			}
354 			/* FALLTHROUGH */
355 		case COMMA:
356 			if (i && *pm == COMMA)
357 				break;
358 			else {
359 				/* Append the current string */
360 				for (lm = ls; (pl < pm); *lm++ = *pl++)
361 					continue;
362 				/*
363 				 * Append the rest of the pattern after the
364 				 * closing brace
365 				 */
366 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
367 					continue;
368 
369 				/* Expand the current pattern */
370 #ifdef DEBUG
371 				qprintf("globexp2:", patbuf);
372 #endif
373 				*rv = globexp1(patbuf, pglob, limit);
374 
375 				/* move after the comma, to the next string */
376 				pl = pm + 1;
377 			}
378 			break;
379 
380 		default:
381 			break;
382 		}
383 	*rv = 0;
384 	return 0;
385 }
386 
387 
388 
389 /*
390  * expand tilde from the passwd file.
391  */
392 static const Char *
393 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
394 {
395 	struct passwd *pwd;
396 	char *h;
397 	const Char *p;
398 	Char *b, *eb;
399 
400 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
401 		return pattern;
402 
403 	/*
404 	 * Copy up to the end of the string or /
405 	 */
406 	eb = &patbuf[patbuf_len - 1];
407 	for (p = pattern + 1, h = (char *) patbuf;
408 	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
409 		continue;
410 
411 	*h = EOS;
412 
413 	if (((char *) patbuf)[0] == EOS) {
414 		/*
415 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
416 		 * we're not running setuid or setgid) and then trying
417 		 * the password file
418 		 */
419 		if (
420 #ifndef __HAIKU__
421 		    issetugid() != 0 ||
422 #endif
423 		    (h = getenv("HOME")) == NULL) {
424 			if (((h = getlogin()) != NULL &&
425 			     (pwd = getpwnam(h)) != NULL) ||
426 			    (pwd = getpwuid(getuid())) != NULL)
427 				h = pwd->pw_dir;
428 			else
429 				return pattern;
430 		}
431 	}
432 	else {
433 		/*
434 		 * Expand a ~user
435 		 */
436 		if ((pwd = getpwnam((char*) patbuf)) == NULL)
437 			return pattern;
438 		else
439 			h = pwd->pw_dir;
440 	}
441 
442 	/* Copy the home directory */
443 	for (b = patbuf; b < eb && *h; *b++ = *h++)
444 		continue;
445 
446 	/* Append the rest of the pattern */
447 	while (b < eb && (*b++ = *p++) != EOS)
448 		continue;
449 	*b = EOS;
450 
451 	return patbuf;
452 }
453 
454 
455 /*
456  * The main glob() routine: compiles the pattern (optionally processing
457  * quotes), calls glob1() to do the real pattern matching, and finally
458  * sorts the list (unless unsorted operation is requested).  Returns 0
459  * if things went well, nonzero if errors occurred.
460  */
461 static int
462 glob0(const Char *pattern, glob_t *pglob, size_t *limit)
463 {
464 	const Char *qpatnext;
465 	int c, err;
466 	size_t oldpathc;
467 	Char *bufnext, patbuf[MAXPATHLEN];
468 
469 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
470 	oldpathc = pglob->gl_pathc;
471 	bufnext = patbuf;
472 
473 	/* We don't need to check for buffer overflow any more. */
474 	while ((c = *qpatnext++) != EOS) {
475 		switch (c) {
476 		case LBRACKET:
477 			c = *qpatnext;
478 			if (c == NOT)
479 				++qpatnext;
480 			if (*qpatnext == EOS ||
481 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
482 				*bufnext++ = LBRACKET;
483 				if (c == NOT)
484 					--qpatnext;
485 				break;
486 			}
487 			*bufnext++ = M_SET;
488 			if (c == NOT)
489 				*bufnext++ = M_NOT;
490 			c = *qpatnext++;
491 			do {
492 				*bufnext++ = CHAR(c);
493 				if (*qpatnext == RANGE &&
494 				    (c = qpatnext[1]) != RBRACKET) {
495 					*bufnext++ = M_RNG;
496 					*bufnext++ = CHAR(c);
497 					qpatnext += 2;
498 				}
499 			} while ((c = *qpatnext++) != RBRACKET);
500 			pglob->gl_flags |= GLOB_MAGCHAR;
501 			*bufnext++ = M_END;
502 			break;
503 		case QUESTION:
504 			pglob->gl_flags |= GLOB_MAGCHAR;
505 			*bufnext++ = M_ONE;
506 			break;
507 		case STAR:
508 			pglob->gl_flags |= GLOB_MAGCHAR;
509 			/* collapse adjacent stars to one,
510 			 * to avoid exponential behavior
511 			 */
512 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
513 			    *bufnext++ = M_ALL;
514 			break;
515 		default:
516 			*bufnext++ = CHAR(c);
517 			break;
518 		}
519 	}
520 	*bufnext = EOS;
521 #ifdef DEBUG
522 	qprintf("glob0:", patbuf);
523 #endif
524 
525 	if ((err = glob1(patbuf, pglob, limit)) != 0)
526 		return(err);
527 
528 	/*
529 	 * If there was no match we are going to append the pattern
530 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
531 	 * and the pattern did not contain any magic characters
532 	 * GLOB_NOMAGIC is there just for compatibility with csh.
533 	 */
534 	if (pglob->gl_pathc == oldpathc) {
535 		if (((pglob->gl_flags & GLOB_NOCHECK) ||
536 		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
537 			!(pglob->gl_flags & GLOB_MAGCHAR))))
538 			return(globextend(pattern, pglob, limit));
539 		else
540 			return(GLOB_NOMATCH);
541 	}
542 	if (!(pglob->gl_flags & GLOB_NOSORT))
543 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
544 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
545 	return(0);
546 }
547 
548 static int
549 compare(const void *p, const void *q)
550 {
551 	return(strcmp(*(char **)p, *(char **)q));
552 }
553 
554 static int
555 glob1(Char *pattern, glob_t *pglob, size_t *limit)
556 {
557 	Char pathbuf[MAXPATHLEN];
558 
559 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
560 	if (*pattern == EOS)
561 		return(0);
562 	return(glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
563 	    pattern, pglob, limit));
564 }
565 
566 /*
567  * The functions glob2 and glob3 are mutually recursive; there is one level
568  * of recursion for each segment in the pattern that contains one or more
569  * meta characters.
570  */
571 static int
572 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
573       glob_t *pglob, size_t *limit)
574 {
575 	struct stat sb;
576 	Char *p, *q;
577 	int anymeta;
578 
579 	/*
580 	 * Loop over pattern segments until end of pattern or until
581 	 * segment with meta character found.
582 	 */
583 	for (anymeta = 0;;) {
584 		if (*pattern == EOS) {		/* End of pattern? */
585 			*pathend = EOS;
586 			if (g_lstat(pathbuf, &sb, pglob))
587 				return(0);
588 
589 			if (((pglob->gl_flags & GLOB_MARK) &&
590 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
591 			    || (S_ISLNK(sb.st_mode) &&
592 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
593 			    S_ISDIR(sb.st_mode)))) {
594 				if (pathend + 1 > pathend_last)
595 					return (GLOB_ABORTED);
596 				*pathend++ = SEP;
597 				*pathend = EOS;
598 			}
599 			++pglob->gl_matchc;
600 			return(globextend(pathbuf, pglob, limit));
601 		}
602 
603 		/* Find end of next segment, copy tentatively to pathend. */
604 		q = pathend;
605 		p = pattern;
606 		while (*p != EOS && *p != SEP) {
607 			if (ismeta(*p))
608 				anymeta = 1;
609 			if (q + 1 > pathend_last)
610 				return (GLOB_ABORTED);
611 			*q++ = *p++;
612 		}
613 
614 		if (!anymeta) {		/* No expansion, do next segment. */
615 			pathend = q;
616 			pattern = p;
617 			while (*pattern == SEP) {
618 				if (pathend + 1 > pathend_last)
619 					return (GLOB_ABORTED);
620 				*pathend++ = *pattern++;
621 			}
622 		} else			/* Need expansion, recurse. */
623 			return(glob3(pathbuf, pathend, pathend_last, pattern, p,
624 			    pglob, limit));
625 	}
626 	/* NOTREACHED */
627 }
628 
629 static int
630 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
631       Char *pattern, Char *restpattern,
632       glob_t *pglob, size_t *limit)
633 {
634 	struct dirent *dp;
635 	DIR *dirp;
636 	int err;
637 	char buf[MAXPATHLEN];
638 
639 	/*
640 	 * The readdirfunc declaration can't be prototyped, because it is
641 	 * assigned, below, to two functions which are prototyped in glob.h
642 	 * and dirent.h as taking pointers to differently typed opaque
643 	 * structures.
644 	 */
645 	struct dirent *(*readdirfunc)();
646 
647 	if (pathend > pathend_last)
648 		return (GLOB_ABORTED);
649 	*pathend = EOS;
650 	errno = 0;
651 
652 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
653 		/* TODO: don't call for ENOENT or ENOTDIR? */
654 		if (pglob->gl_errfunc) {
655 			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
656 				return (GLOB_ABORTED);
657 			if (pglob->gl_errfunc(buf, errno) ||
658 			    pglob->gl_flags & GLOB_ERR)
659 				return (GLOB_ABORTED);
660 		}
661 		return(0);
662 	}
663 
664 	err = 0;
665 
666 	/* Search directory for matching names. */
667 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
668 		readdirfunc = pglob->gl_readdir;
669 	else
670 		readdirfunc = readdir;
671 	while ((dp = (*readdirfunc)(dirp))) {
672 		char *sc;
673 		Char *dc;
674 		wchar_t wc;
675 		size_t clen;
676 		mbstate_t mbs;
677 
678 		/* Initial DOT must be matched literally. */
679 		if (dp->d_name[0] == DOT && *pattern != DOT)
680 			continue;
681 		memset(&mbs, 0, sizeof(mbs));
682 		dc = pathend;
683 		sc = dp->d_name;
684 		while (dc < pathend_last) {
685 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
686 			if (clen == (size_t)-1 || clen == (size_t)-2) {
687 				wc = *sc;
688 				clen = 1;
689 				memset(&mbs, 0, sizeof(mbs));
690 			}
691 			if ((*dc++ = wc) == EOS)
692 				break;
693 			sc += clen;
694 		}
695 		if (!match(pathend, pattern, restpattern)) {
696 			*pathend = EOS;
697 			continue;
698 		}
699 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
700 		    pglob, limit);
701 		if (err)
702 			break;
703 	}
704 
705 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
706 		(*pglob->gl_closedir)(dirp);
707 	else
708 		closedir(dirp);
709 	return(err);
710 }
711 
712 
713 /*
714  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
715  * add the new item, and update gl_pathc.
716  *
717  * This assumes the BSD realloc, which only copies the block when its size
718  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
719  * behavior.
720  *
721  * Return 0 if new item added, error code if memory couldn't be allocated.
722  *
723  * Invariant of the glob_t structure:
724  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
725  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
726  */
727 static int
728 globextend(const Char *path, glob_t *pglob, size_t *limit)
729 {
730 	char **pathv;
731 	size_t i, newsize, len;
732 	char *copy;
733 	const Char *p;
734 
735 	if (*limit && pglob->gl_pathc > *limit) {
736 		errno = 0;
737 		return (GLOB_NOSPACE);
738 	}
739 
740 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
741 	pathv = pglob->gl_pathv ?
742 		    realloc((char *)pglob->gl_pathv, newsize) :
743 		    malloc(newsize);
744 	if (pathv == NULL) {
745 		if (pglob->gl_pathv) {
746 			free(pglob->gl_pathv);
747 			pglob->gl_pathv = NULL;
748 		}
749 		return(GLOB_NOSPACE);
750 	}
751 
752 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
753 		/* first time around -- clear initial gl_offs items */
754 		pathv += pglob->gl_offs;
755 		for (i = pglob->gl_offs + 1; --i > 0; )
756 			*--pathv = NULL;
757 	}
758 	pglob->gl_pathv = pathv;
759 
760 	for (p = path; *p++;)
761 		continue;
762 	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
763 	if ((copy = malloc(len)) != NULL) {
764 		if (g_Ctoc(path, copy, len)) {
765 			free(copy);
766 			return (GLOB_NOSPACE);
767 		}
768 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
769 	}
770 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
771 	return(copy == NULL ? GLOB_NOSPACE : 0);
772 }
773 
774 /*
775  * pattern matching function for filenames.  Each occurrence of the *
776  * pattern causes a recursion level.
777  */
778 static int
779 match(Char *name, Char *pat, Char *patend)
780 {
781 	int ok, negate_range;
782 	Char c, k;
783 
784 	while (pat < patend) {
785 		c = *pat++;
786 		switch (c & M_MASK) {
787 		case M_ALL:
788 			if (pat == patend)
789 				return(1);
790 			do
791 			    if (match(name, pat, patend))
792 				    return(1);
793 			while (*name++ != EOS);
794 			return(0);
795 		case M_ONE:
796 			if (*name++ == EOS)
797 				return(0);
798 			break;
799 		case M_SET:
800 			ok = 0;
801 			if ((k = *name++) == EOS)
802 				return(0);
803 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
804 				++pat;
805 			while (((c = *pat++) & M_MASK) != M_END)
806 				if ((*pat & M_MASK) == M_RNG) {
807 					if (
808 #ifndef __HAIKU__
809 					    __collate_load_error ?
810 #endif
811 					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])
812 #ifndef __HAIKU__
813 					    :
814 					       __collate_range_cmp(CHAR(c), CHAR(k)) <= 0
815 					    && __collate_range_cmp(CHAR(k), CHAR(pat[1])) <= 0
816 #endif
817 					   )
818 						ok = 1;
819 					pat += 2;
820 				} else if (c == k)
821 					ok = 1;
822 			if (ok == negate_range)
823 				return(0);
824 			break;
825 		default:
826 			if (*name++ != c)
827 				return(0);
828 			break;
829 		}
830 	}
831 	return(*name == EOS);
832 }
833 
834 /* Free allocated data belonging to a glob_t structure. */
835 void
836 globfree(glob_t *pglob)
837 {
838 	size_t i;
839 	char **pp;
840 
841 	if (pglob->gl_pathv != NULL) {
842 		pp = pglob->gl_pathv + pglob->gl_offs;
843 		for (i = pglob->gl_pathc; i--; ++pp)
844 			if (*pp)
845 				free(*pp);
846 		free(pglob->gl_pathv);
847 		pglob->gl_pathv = NULL;
848 	}
849 }
850 
851 static DIR *
852 g_opendir(Char *str, glob_t *pglob)
853 {
854 	char buf[MAXPATHLEN];
855 
856 	if (!*str)
857 		strcpy(buf, ".");
858 	else {
859 		if (g_Ctoc(str, buf, sizeof(buf)))
860 			return (NULL);
861 	}
862 
863 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
864 		return((*pglob->gl_opendir)(buf));
865 
866 	return(opendir(buf));
867 }
868 
869 static int
870 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
871 {
872 	char buf[MAXPATHLEN];
873 
874 	if (g_Ctoc(fn, buf, sizeof(buf))) {
875 		errno = ENAMETOOLONG;
876 		return (-1);
877 	}
878 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
879 		return((*pglob->gl_lstat)(buf, sb));
880 	return(lstat(buf, sb));
881 }
882 
883 static int
884 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
885 {
886 	char buf[MAXPATHLEN];
887 
888 	if (g_Ctoc(fn, buf, sizeof(buf))) {
889 		errno = ENAMETOOLONG;
890 		return (-1);
891 	}
892 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
893 		return((*pglob->gl_stat)(buf, sb));
894 	return(stat(buf, sb));
895 }
896 
897 static const Char *
898 g_strchr(const Char *str, wchar_t ch)
899 {
900 
901 	do {
902 		if (*str == ch)
903 			return (str);
904 	} while (*str++);
905 	return (NULL);
906 }
907 
908 static int
909 g_Ctoc(const Char *str, char *buf, size_t len)
910 {
911 	mbstate_t mbs;
912 	size_t clen;
913 
914 	memset(&mbs, 0, sizeof(mbs));
915 	while (len >= MB_CUR_MAX) {
916 		clen = wcrtomb(buf, *str, &mbs);
917 		if (clen == (size_t)-1)
918 			return (1);
919 		if (*str == L'\0')
920 			return (0);
921 		str++;
922 		buf += clen;
923 		len -= clen;
924 	}
925 	return (1);
926 }
927 
928 #ifdef DEBUG
929 static void
930 qprintf(const char *str, Char *s)
931 {
932 	Char *p;
933 
934 	(void)printf("%s:\n", str);
935 	for (p = s; *p; p++)
936 		(void)printf("%c", CHAR(*p));
937 	(void)printf("\n");
938 	for (p = s; *p; p++)
939 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
940 	(void)printf("\n");
941 	for (p = s; *p; p++)
942 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
943 	(void)printf("\n");
944 }
945 #endif
946