xref: /haiku/src/system/libroot/posix/glibc/regex/regex.c (revision b671e9bbdbd10268a042b4f4cc4317ccd03d105e)
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P1003.2/D11.2, except for some of the
4    internationalization features.)
5    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
6 
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Library General Public License as
9    published by the Free Software Foundation; either version 2 of the
10    License, or (at your option) any later version.
11 
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Library General Public License for more details.
16 
17    You should have received a copy of the GNU Library General Public
18    License along with the GNU C Library; see the file COPYING.LIB.  If not,
19    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20    Boston, MA 02111-1307, USA.  */
21 
22 /* AIX requires this to be the first thing in the file. */
23 #if defined _AIX && !defined REGEX_MALLOC
24   #pragma alloca
25 #endif
26 
27 #undef	_GNU_SOURCE
28 #define _GNU_SOURCE
29 
30 #ifndef PARAMS
31 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
32 #  define PARAMS(args) args
33 # else
34 #  define PARAMS(args) ()
35 # endif  /* GCC.  */
36 #endif  /* Not PARAMS.  */
37 
38 #if defined STDC_HEADERS && !defined emacs
39 # include <stddef.h>
40 #else
41 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
42 # include <sys/types.h>
43 #endif
44 
45 #define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
46 
47 /* For platform which support the ISO C amendement 1 functionality we
48    support user defined character classes.  */
49 #if defined _LIBC || WIDE_CHAR_SUPPORT
50 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
51 # include <wchar.h>
52 # include <wctype.h>
53 #endif
54 
55 /* This is for multi byte string support.  */
56 #ifdef MBS_SUPPORT
57 # define CHAR_TYPE wchar_t
58 # define US_CHAR_TYPE wchar_t/* unsigned character type */
59 # define COMPILED_BUFFER_VAR wc_buffer
60 # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
61 # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_TYPE)+1)
62 # define PUT_CHAR(c) \
63   do {									      \
64     if (MB_CUR_MAX == 1)						      \
65       putchar (c);							      \
66     else								      \
67       printf ("%C", (wint_t) c); /* Should we use wide stream??  */	      \
68   } while (0)
69 # define TRUE 1
70 # define FALSE 0
71 #else
72 # define CHAR_TYPE char
73 # define US_CHAR_TYPE unsigned char /* unsigned character type */
74 # define COMPILED_BUFFER_VAR bufp->buffer
75 # define OFFSET_ADDRESS_SIZE 2
76 # define PUT_CHAR(c) putchar (c)
77 #endif /* MBS_SUPPORT */
78 
79 #ifdef _LIBC
80 /* We have to keep the namespace clean.  */
81 # define regfree(preg) __regfree (preg)
82 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
83 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
84 # define regerror(errcode, preg, errbuf, errbuf_size) \
85 	__regerror(errcode, preg, errbuf, errbuf_size)
86 # define re_set_registers(bu, re, nu, st, en) \
87 	__re_set_registers (bu, re, nu, st, en)
88 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
89 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
90 # define re_match(bufp, string, size, pos, regs) \
91 	__re_match (bufp, string, size, pos, regs)
92 # define re_search(bufp, string, size, startpos, range, regs) \
93 	__re_search (bufp, string, size, startpos, range, regs)
94 # define re_compile_pattern(pattern, length, bufp) \
95 	__re_compile_pattern (pattern, length, bufp)
96 # define re_set_syntax(syntax) __re_set_syntax (syntax)
97 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
98 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
99 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
100 
101 # define btowc __btowc
102 
103 /* We are also using some library internals.  */
104 # include <locale/localeinfo.h>
105 # include <locale/elem-hash.h>
106 # include <langinfo.h>
107 # include <locale/coll-lookup.h>
108 #endif
109 
110 /* This is for other GNU distributions with internationalized messages.  */
111 #if HAVE_LIBINTL_H || defined _LIBC
112 # include <libintl.h>
113 # ifdef _LIBC
114 #  undef gettext
115 #  define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
116 # endif
117 #else
118 # define gettext(msgid) (msgid)
119 #endif
120 
121 #ifndef gettext_noop
122 /* This define is so xgettext can find the internationalizable
123    strings.  */
124 # define gettext_noop(String) String
125 #endif
126 
127 /* The `emacs' switch turns on certain matching commands
128    that make sense only in Emacs. */
129 #ifdef emacs
130 
131 # include "lisp.h"
132 # include "buffer.h"
133 # include "syntax.h"
134 
135 #else  /* not emacs */
136 
137 /* If we are not linking with Emacs proper,
138    we can't use the relocating allocator
139    even if config.h says that we can.  */
140 # undef REL_ALLOC
141 
142 #include <stdlib.h>
143 
144 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
145    If nothing else has been done, use the method below.  */
146 # ifdef INHIBIT_STRING_HEADER
147 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
148 #   if !defined bzero && !defined bcopy
149 #    undef INHIBIT_STRING_HEADER
150 #   endif
151 #  endif
152 # endif
153 
154 /* This is the normal way of making sure we have a bcopy and a bzero.
155    This is used in most programs--a few other programs avoid this
156    by defining INHIBIT_STRING_HEADER.  */
157 # ifndef INHIBIT_STRING_HEADER
158 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
159 #   include <string.h>
160 #   ifndef bzero
161 #    ifndef _LIBC
162 #     define bzero(s, n)	(memset (s, '\0', n), (s))
163 #    else
164 #     define bzero(s, n)	__bzero (s, n)
165 #    endif
166 #   endif
167 #  else
168 #   include <strings.h>
169 #   ifndef memcmp
170 #    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
171 #   endif
172 #   ifndef memcpy
173 #    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
174 #   endif
175 #  endif
176 # endif
177 
178 /* Define the syntax stuff for \<, \>, etc.  */
179 
180 /* This must be nonzero for the wordchar and notwordchar pattern
181    commands in re_match_2.  */
182 # ifndef Sword
183 #  define Sword 1
184 # endif
185 
186 # ifdef SWITCH_ENUM_BUG
187 #  define SWITCH_ENUM_CAST(x) ((int)(x))
188 # else
189 #  define SWITCH_ENUM_CAST(x) (x)
190 # endif
191 
192 #endif /* not emacs */
193 
194 #if defined _LIBC || HAVE_LIMITS_H
195 # include <limits.h>
196 #endif
197 
198 #ifndef MB_LEN_MAX
199 # define MB_LEN_MAX 1
200 #endif
201 
202 /* Get the interface, including the syntax bits.  */
203 #include <regex.h>
204 
205 /* isalpha etc. are used for the character classes.  */
206 #include <ctype.h>
207 
208 /* Jim Meyering writes:
209 
210    "... Some ctype macros are valid only for character codes that
211    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
212    using /bin/cc or gcc but without giving an ansi option).  So, all
213    ctype uses should be through macros like ISPRINT...  If
214    STDC_HEADERS is defined, then autoconf has verified that the ctype
215    macros don't need to be guarded with references to isascii. ...
216    Defining isascii to 1 should let any compiler worth its salt
217    eliminate the && through constant folding."
218    Solaris defines some of these symbols so we must undefine them first.  */
219 
220 #undef ISASCII
221 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
222 # define ISASCII(c) 1
223 #else
224 # define ISASCII(c) isascii(c)
225 #endif
226 
227 #ifdef isblank
228 # define ISBLANK(c) (ISASCII (c) && isblank (c))
229 #else
230 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
231 #endif
232 #ifdef isgraph
233 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
234 #else
235 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
236 #endif
237 
238 #undef ISPRINT
239 #define ISPRINT(c) (ISASCII (c) && isprint (c))
240 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
241 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
242 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
243 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
244 #define ISLOWER(c) (ISASCII (c) && islower (c))
245 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
246 #define ISSPACE(c) (ISASCII (c) && isspace (c))
247 #define ISUPPER(c) (ISASCII (c) && isupper (c))
248 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
249 
250 #ifdef _tolower
251 # define TOLOWER(c) _tolower(c)
252 #else
253 # define TOLOWER(c) tolower(c)
254 #endif
255 
256 #ifndef NULL
257 # define NULL (void *)0
258 #endif
259 
260 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
261    since ours (we hope) works properly with all combinations of
262    machines, compilers, `char' and `unsigned char' argument types.
263    (Per Bothner suggested the basic approach.)  */
264 #undef SIGN_EXTEND_CHAR
265 #if __STDC__
266 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
267 #else  /* not __STDC__ */
268 /* As in Harbison and Steele.  */
269 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
270 #endif
271 
272 #ifndef emacs
273 /* How many characters in the character set.  */
274 # define CHAR_SET_SIZE 256
275 
276 # ifdef SYNTAX_TABLE
277 
278 extern char *re_syntax_table;
279 
280 # else /* not SYNTAX_TABLE */
281 
282 static char re_syntax_table[CHAR_SET_SIZE];
283 
284 static void init_syntax_once PARAMS ((void));
285 
286 static void
287 init_syntax_once ()
288 {
289    register int c;
290    static int done = 0;
291 
292    if (done)
293      return;
294    bzero (re_syntax_table, sizeof re_syntax_table);
295 
296    for (c = 0; c < CHAR_SET_SIZE; ++c)
297      if (ISALNUM (c))
298 	re_syntax_table[c] = Sword;
299 
300    re_syntax_table['_'] = Sword;
301 
302    done = 1;
303 }
304 
305 # endif /* not SYNTAX_TABLE */
306 
307 # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
308 
309 #endif /* emacs */
310 
311 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
312    use `alloca' instead of `malloc'.  This is because using malloc in
313    re_search* or re_match* could cause memory leaks when C-g is used in
314    Emacs; also, malloc is slower and causes storage fragmentation.  On
315    the other hand, malloc is more portable, and easier to debug.
316 
317    Because we sometimes use alloca, some routines have to be macros,
318    not functions -- `alloca'-allocated space disappears at the end of the
319    function it is called in.  */
320 
321 #ifdef REGEX_MALLOC
322 
323 # define REGEX_ALLOCATE malloc
324 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
325 # define REGEX_FREE free
326 
327 #else /* not REGEX_MALLOC  */
328 
329 /* Emacs already defines alloca, sometimes.  */
330 # ifndef alloca
331 
332 /* Make alloca work the best possible way.  */
333 #  ifdef __GNUC__
334 #   define alloca __builtin_alloca
335 #  else /* not __GNUC__ */
336 #   if HAVE_ALLOCA_H
337 #    include <alloca.h>
338 #   endif /* HAVE_ALLOCA_H */
339 #  endif /* not __GNUC__ */
340 
341 # endif /* not alloca */
342 
343 # define REGEX_ALLOCATE alloca
344 
345 /* Assumes a `char *destination' variable.  */
346 # define REGEX_REALLOCATE(source, osize, nsize)				\
347   (destination = (char *) alloca (nsize),				\
348    memcpy (destination, source, osize))
349 
350 /* No need to do anything to free, after alloca.  */
351 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
352 
353 #endif /* not REGEX_MALLOC */
354 
355 /* Define how to allocate the failure stack.  */
356 
357 #if defined REL_ALLOC && defined REGEX_MALLOC
358 
359 # define REGEX_ALLOCATE_STACK(size)				\
360   r_alloc (&failure_stack_ptr, (size))
361 # define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
362   r_re_alloc (&failure_stack_ptr, (nsize))
363 # define REGEX_FREE_STACK(ptr)					\
364   r_alloc_free (&failure_stack_ptr)
365 
366 #else /* not using relocating allocator */
367 
368 # ifdef REGEX_MALLOC
369 
370 #  define REGEX_ALLOCATE_STACK malloc
371 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
372 #  define REGEX_FREE_STACK free
373 
374 # else /* not REGEX_MALLOC */
375 
376 #  define REGEX_ALLOCATE_STACK alloca
377 
378 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
379    REGEX_REALLOCATE (source, osize, nsize)
380 /* No need to explicitly free anything.  */
381 #  define REGEX_FREE_STACK(arg)
382 
383 # endif /* not REGEX_MALLOC */
384 #endif /* not using relocating allocator */
385 
386 
387 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
388    `string1' or just past its end.  This works if PTR is NULL, which is
389    a good thing.  */
390 #define FIRST_STRING_P(ptr) 					\
391   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
392 
393 /* (Re)Allocate N items of type T using malloc, or fail.  */
394 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
395 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
396 #define RETALLOC_IF(addr, n, t) \
397   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
398 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
399 
400 #define BYTEWIDTH 8 /* In bits.  */
401 
402 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
403 
404 #undef MAX
405 #undef MIN
406 #define MAX(a, b) ((a) > (b) ? (a) : (b))
407 #define MIN(a, b) ((a) < (b) ? (a) : (b))
408 
409 typedef char boolean;
410 #define false 0
411 #define true 1
412 
413 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
414 					const char *string1, int size1,
415 					const char *string2, int size2,
416 					int pos,
417 					struct re_registers *regs,
418 					int stop));
419 
420 /* These are the command codes that appear in compiled regular
421    expressions.  Some opcodes are followed by argument bytes.  A
422    command code can specify any interpretation whatsoever for its
423    arguments.  Zero bytes may appear in the compiled regular expression.  */
424 
425 typedef enum
426 {
427   no_op = 0,
428 
429   /* Succeed right away--no more backtracking.  */
430   succeed,
431 
432         /* Followed by one byte giving n, then by n literal bytes.  */
433   exactn,
434 
435 #ifdef MBS_SUPPORT
436 	/* Same as exactn, but contains binary data.  */
437   exactn_bin,
438 #endif
439 
440         /* Matches any (more or less) character.  */
441   anychar,
442 
443         /* Matches any one char belonging to specified set.  First
444            following byte is number of bitmap bytes.  Then come bytes
445            for a bitmap saying which chars are in.  Bits in each byte
446            are ordered low-bit-first.  A character is in the set if its
447            bit is 1.  A character too large to have a bit in the map is
448            automatically not in the set.  */
449         /* ifdef MBS_SUPPORT, following element is length of character
450 	   classes, length of collating symbols, length of equivalence
451 	   classes, length of character ranges, and length of characters.
452 	   Next, character class element, collating symbols elements,
453 	   equivalence class elements, range elements, and character
454 	   elements follow.
455 	   See regex_compile function.  */
456   charset,
457 
458         /* Same parameters as charset, but match any character that is
459            not one of those specified.  */
460   charset_not,
461 
462         /* Start remembering the text that is matched, for storing in a
463            register.  Followed by one byte with the register number, in
464            the range 0 to one less than the pattern buffer's re_nsub
465            field.  Then followed by one byte with the number of groups
466            inner to this one.  (This last has to be part of the
467            start_memory only because we need it in the on_failure_jump
468            of re_match_2.)  */
469   start_memory,
470 
471         /* Stop remembering the text that is matched and store it in a
472            memory register.  Followed by one byte with the register
473            number, in the range 0 to one less than `re_nsub' in the
474            pattern buffer, and one byte with the number of inner groups,
475            just like `start_memory'.  (We need the number of inner
476            groups here because we don't have any easy way of finding the
477            corresponding start_memory when we're at a stop_memory.)  */
478   stop_memory,
479 
480         /* Match a duplicate of something remembered. Followed by one
481            byte containing the register number.  */
482   duplicate,
483 
484         /* Fail unless at beginning of line.  */
485   begline,
486 
487         /* Fail unless at end of line.  */
488   endline,
489 
490         /* Succeeds if at beginning of buffer (if emacs) or at beginning
491            of string to be matched (if not).  */
492   begbuf,
493 
494         /* Analogously, for end of buffer/string.  */
495   endbuf,
496 
497         /* Followed by two byte relative address to which to jump.  */
498   jump,
499 
500 	/* Same as jump, but marks the end of an alternative.  */
501   jump_past_alt,
502 
503         /* Followed by two-byte relative address of place to resume at
504            in case of failure.  */
505         /* ifdef MBS_SUPPORT, the size of address is 1.  */
506   on_failure_jump,
507 
508         /* Like on_failure_jump, but pushes a placeholder instead of the
509            current string position when executed.  */
510   on_failure_keep_string_jump,
511 
512         /* Throw away latest failure point and then jump to following
513            two-byte relative address.  */
514         /* ifdef MBS_SUPPORT, the size of address is 1.  */
515   pop_failure_jump,
516 
517         /* Change to pop_failure_jump if know won't have to backtrack to
518            match; otherwise change to jump.  This is used to jump
519            back to the beginning of a repeat.  If what follows this jump
520            clearly won't match what the repeat does, such that we can be
521            sure that there is no use backtracking out of repetitions
522            already matched, then we change it to a pop_failure_jump.
523            Followed by two-byte address.  */
524         /* ifdef MBS_SUPPORT, the size of address is 1.  */
525   maybe_pop_jump,
526 
527         /* Jump to following two-byte address, and push a dummy failure
528            point. This failure point will be thrown away if an attempt
529            is made to use it for a failure.  A `+' construct makes this
530            before the first repeat.  Also used as an intermediary kind
531            of jump when compiling an alternative.  */
532         /* ifdef MBS_SUPPORT, the size of address is 1.  */
533   dummy_failure_jump,
534 
535 	/* Push a dummy failure point and continue.  Used at the end of
536 	   alternatives.  */
537   push_dummy_failure,
538 
539         /* Followed by two-byte relative address and two-byte number n.
540            After matching N times, jump to the address upon failure.  */
541         /* ifdef MBS_SUPPORT, the size of address is 1.  */
542   succeed_n,
543 
544         /* Followed by two-byte relative address, and two-byte number n.
545            Jump to the address N times, then fail.  */
546         /* ifdef MBS_SUPPORT, the size of address is 1.  */
547   jump_n,
548 
549         /* Set the following two-byte relative address to the
550            subsequent two-byte number.  The address *includes* the two
551            bytes of number.  */
552         /* ifdef MBS_SUPPORT, the size of address is 1.  */
553   set_number_at,
554 
555   wordchar,	/* Matches any word-constituent character.  */
556   notwordchar,	/* Matches any char that is not a word-constituent.  */
557 
558   wordbeg,	/* Succeeds if at word beginning.  */
559   wordend,	/* Succeeds if at word end.  */
560 
561   wordbound,	/* Succeeds if at a word boundary.  */
562   notwordbound	/* Succeeds if not at a word boundary.  */
563 
564 #ifdef emacs
565   ,before_dot,	/* Succeeds if before point.  */
566   at_dot,	/* Succeeds if at point.  */
567   after_dot,	/* Succeeds if after point.  */
568 
569 	/* Matches any character whose syntax is specified.  Followed by
570            a byte which contains a syntax code, e.g., Sword.  */
571   syntaxspec,
572 
573 	/* Matches any character whose syntax is not that specified.  */
574   notsyntaxspec
575 #endif /* emacs */
576 } re_opcode_t;
577 
578 /* Common operations on the compiled pattern.  */
579 
580 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
581 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
582 
583 #ifdef MBS_SUPPORT
584 # define STORE_NUMBER(destination, number)				\
585   do {									\
586     *(destination) = (US_CHAR_TYPE)(number);				\
587   } while (0)
588 #else
589 # define STORE_NUMBER(destination, number)				\
590   do {									\
591     (destination)[0] = (number) & 0377;					\
592     (destination)[1] = (number) >> 8;					\
593   } while (0)
594 #endif /* MBS_SUPPORT */
595 
596 /* Same as STORE_NUMBER, except increment DESTINATION to
597    the byte after where the number is stored.  Therefore, DESTINATION
598    must be an lvalue.  */
599 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
600 
601 #define STORE_NUMBER_AND_INCR(destination, number)			\
602   do {									\
603     STORE_NUMBER (destination, number);					\
604     (destination) += OFFSET_ADDRESS_SIZE;				\
605   } while (0)
606 
607 /* Put into DESTINATION a number stored in two contiguous bytes starting
608    at SOURCE.  */
609 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
610 
611 #ifdef MBS_SUPPORT
612 # define EXTRACT_NUMBER(destination, source)				\
613   do {									\
614     (destination) = *(source);						\
615   } while (0)
616 #else
617 # define EXTRACT_NUMBER(destination, source)				\
618   do {									\
619     (destination) = *(source) & 0377;					\
620     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
621   } while (0)
622 #endif
623 
624 #ifdef DEBUG
625 static void extract_number _RE_ARGS ((int *dest, US_CHAR_TYPE *source));
626 static void
627 extract_number (dest, source)
628     int *dest;
629     US_CHAR_TYPE *source;
630 {
631 #ifdef MBS_SUPPORT
632   *dest = *source;
633 #else
634   int temp = SIGN_EXTEND_CHAR (*(source + 1));
635   *dest = *source & 0377;
636   *dest += temp << 8;
637 #endif
638 }
639 
640 # ifndef EXTRACT_MACROS /* To debug the macros.  */
641 #  undef EXTRACT_NUMBER
642 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
643 # endif /* not EXTRACT_MACROS */
644 
645 #endif /* DEBUG */
646 
647 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
648    SOURCE must be an lvalue.  */
649 
650 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
651   do {									\
652     EXTRACT_NUMBER (destination, source);				\
653     (source) += OFFSET_ADDRESS_SIZE; 					\
654   } while (0)
655 
656 #ifdef DEBUG
657 static void extract_number_and_incr _RE_ARGS ((int *destination,
658 					       US_CHAR_TYPE **source));
659 static void
660 extract_number_and_incr (destination, source)
661     int *destination;
662     US_CHAR_TYPE **source;
663 {
664   extract_number (destination, *source);
665   *source += OFFSET_ADDRESS_SIZE;
666 }
667 
668 # ifndef EXTRACT_MACROS
669 #  undef EXTRACT_NUMBER_AND_INCR
670 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
671   extract_number_and_incr (&dest, &src)
672 # endif /* not EXTRACT_MACROS */
673 
674 #endif /* DEBUG */
675 
676 /* If DEBUG is defined, Regex prints many voluminous messages about what
677    it is doing (if the variable `debug' is nonzero).  If linked with the
678    main program in `iregex.c', you can enter patterns and strings
679    interactively.  And if linked with the main program in `main.c' and
680    the other test files, you can run the already-written tests.  */
681 
682 #ifdef DEBUG
683 
684 /* We use standard I/O for debugging.  */
685 # include <stdio.h>
686 
687 /* It is useful to test things that ``must'' be true when debugging.  */
688 # include <assert.h>
689 
690 static int debug;
691 
692 # define DEBUG_STATEMENT(e) e
693 # define DEBUG_PRINT1(x) if (debug) printf (x)
694 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
695 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
696 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
697 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
698   if (debug) print_partial_compiled_pattern (s, e)
699 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
700   if (debug) print_double_string (w, s1, sz1, s2, sz2)
701 
702 
703 /* Print the fastmap in human-readable form.  */
704 
705 void
706 print_fastmap (fastmap)
707     char *fastmap;
708 {
709   unsigned was_a_range = 0;
710   unsigned i = 0;
711 
712   while (i < (1 << BYTEWIDTH))
713     {
714       if (fastmap[i++])
715 	{
716 	  was_a_range = 0;
717           putchar (i - 1);
718           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
719             {
720               was_a_range = 1;
721               i++;
722             }
723 	  if (was_a_range)
724             {
725               printf ("-");
726               putchar (i - 1);
727             }
728         }
729     }
730   putchar ('\n');
731 }
732 
733 
734 /* Print a compiled pattern string in human-readable form, starting at
735    the START pointer into it and ending just before the pointer END.  */
736 
737 void
738 print_partial_compiled_pattern (start, end)
739     US_CHAR_TYPE *start;
740     US_CHAR_TYPE *end;
741 {
742   int mcnt, mcnt2;
743   US_CHAR_TYPE *p1;
744   US_CHAR_TYPE *p = start;
745   US_CHAR_TYPE *pend = end;
746 
747   if (start == NULL)
748     {
749       printf ("(null)\n");
750       return;
751     }
752 
753   /* Loop over pattern commands.  */
754   while (p < pend)
755     {
756 #ifdef _LIBC
757       printf ("%td:\t", p - start);
758 #else
759       printf ("%ld:\t", (long int) (p - start));
760 #endif
761 
762       switch ((re_opcode_t) *p++)
763 	{
764         case no_op:
765           printf ("/no_op");
766           break;
767 
768 	case exactn:
769 	  mcnt = *p++;
770           printf ("/exactn/%d", mcnt);
771           do
772 	    {
773               putchar ('/');
774 	      PUT_CHAR (*p++);
775             }
776           while (--mcnt);
777           break;
778 
779 #ifdef MBS_SUPPORT
780 	case exactn_bin:
781 	  mcnt = *p++;
782 	  printf ("/exactn_bin/%d", mcnt);
783           do
784 	    {
785 	      printf("/%lx", (long int) *p++);
786             }
787           while (--mcnt);
788           break;
789 #endif /* MBS_SUPPORT */
790 
791 	case start_memory:
792           mcnt = *p++;
793           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
794           break;
795 
796 	case stop_memory:
797           mcnt = *p++;
798 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
799           break;
800 
801 	case duplicate:
802 	  printf ("/duplicate/%ld", (long int) *p++);
803 	  break;
804 
805 	case anychar:
806 	  printf ("/anychar");
807 	  break;
808 
809 	case charset:
810         case charset_not:
811           {
812 #ifdef MBS_SUPPORT
813 	    int i, length;
814 	    wchar_t *workp = p;
815 	    printf ("/charset [%s",
816 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
817 	    p += 5;
818 	    length = *workp++; /* the length of char_classes */
819 	    for (i=0 ; i<length ; i++)
820 	      printf("[:%lx:]", (long int) *p++);
821 	    length = *workp++; /* the length of collating_symbol */
822 	    for (i=0 ; i<length ;)
823 	      {
824 		printf("[.");
825 		while(*p != 0)
826 		  PUT_CHAR((i++,*p++));
827 		i++,p++;
828 		printf(".]");
829 	      }
830 	    length = *workp++; /* the length of equivalence_class */
831 	    for (i=0 ; i<length ;)
832 	      {
833 		printf("[=");
834 		while(*p != 0)
835 		  PUT_CHAR((i++,*p++));
836 		i++,p++;
837 		printf("=]");
838 	      }
839 	    length = *workp++; /* the length of char_range */
840 	    for (i=0 ; i<length ; i++)
841 	      {
842 		wchar_t range_start = *p++;
843 		wchar_t range_end = *p++;
844 		if (MB_CUR_MAX == 1)
845 		  printf("%c-%c", (char) range_start, (char) range_end);
846 		else
847 		  printf("%C-%C", (wint_t) range_start, (wint_t) range_end);
848 	      }
849 	    length = *workp++; /* the length of char */
850 	    for (i=0 ; i<length ; i++)
851 	      if (MB_CUR_MAX == 1)
852 		putchar (*p++);
853 	      else
854 		printf("%C", (wint_t) *p++);
855 	    putchar (']');
856 #else
857             register int c, last = -100;
858 	    register int in_range = 0;
859 
860 	    printf ("/charset [%s",
861 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
862 
863             assert (p + *p < pend);
864 
865             for (c = 0; c < 256; c++)
866 	      if (c / 8 < *p
867 		  && (p[1 + (c/8)] & (1 << (c % 8))))
868 		{
869 		  /* Are we starting a range?  */
870 		  if (last + 1 == c && ! in_range)
871 		    {
872 		      putchar ('-');
873 		      in_range = 1;
874 		    }
875 		  /* Have we broken a range?  */
876 		  else if (last + 1 != c && in_range)
877               {
878 		      putchar (last);
879 		      in_range = 0;
880 		    }
881 
882 		  if (! in_range)
883 		    putchar (c);
884 
885 		  last = c;
886               }
887 
888 	    if (in_range)
889 	      putchar (last);
890 
891 	    putchar (']');
892 
893 	    p += 1 + *p;
894 #endif /* MBS_SUPPORT */
895 	  }
896 	  break;
897 
898 	case begline:
899 	  printf ("/begline");
900           break;
901 
902 	case endline:
903           printf ("/endline");
904           break;
905 
906 	case on_failure_jump:
907           extract_number_and_incr (&mcnt, &p);
908 #ifdef _LIBC
909   	  printf ("/on_failure_jump to %td", p + mcnt - start);
910 #else
911   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
912 #endif
913           break;
914 
915 	case on_failure_keep_string_jump:
916           extract_number_and_incr (&mcnt, &p);
917 #ifdef _LIBC
918   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
919 #else
920   	  printf ("/on_failure_keep_string_jump to %ld",
921 		  (long int) (p + mcnt - start));
922 #endif
923           break;
924 
925 	case dummy_failure_jump:
926           extract_number_and_incr (&mcnt, &p);
927 #ifdef _LIBC
928   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
929 #else
930   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
931 #endif
932           break;
933 
934 	case push_dummy_failure:
935           printf ("/push_dummy_failure");
936           break;
937 
938         case maybe_pop_jump:
939           extract_number_and_incr (&mcnt, &p);
940 #ifdef _LIBC
941   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
942 #else
943   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
944 #endif
945 	  break;
946 
947         case pop_failure_jump:
948 	  extract_number_and_incr (&mcnt, &p);
949 #ifdef _LIBC
950   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
951 #else
952   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
953 #endif
954 	  break;
955 
956         case jump_past_alt:
957 	  extract_number_and_incr (&mcnt, &p);
958 #ifdef _LIBC
959   	  printf ("/jump_past_alt to %td", p + mcnt - start);
960 #else
961   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
962 #endif
963 	  break;
964 
965         case jump:
966 	  extract_number_and_incr (&mcnt, &p);
967 #ifdef _LIBC
968   	  printf ("/jump to %td", p + mcnt - start);
969 #else
970   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
971 #endif
972 	  break;
973 
974         case succeed_n:
975           extract_number_and_incr (&mcnt, &p);
976 	  p1 = p + mcnt;
977           extract_number_and_incr (&mcnt2, &p);
978 #ifdef _LIBC
979 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
980 #else
981 	  printf ("/succeed_n to %ld, %d times",
982 		  (long int) (p1 - start), mcnt2);
983 #endif
984           break;
985 
986         case jump_n:
987           extract_number_and_incr (&mcnt, &p);
988 	  p1 = p + mcnt;
989           extract_number_and_incr (&mcnt2, &p);
990 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
991           break;
992 
993         case set_number_at:
994           extract_number_and_incr (&mcnt, &p);
995 	  p1 = p + mcnt;
996           extract_number_and_incr (&mcnt2, &p);
997 #ifdef _LIBC
998 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
999 #else
1000 	  printf ("/set_number_at location %ld to %d",
1001 		  (long int) (p1 - start), mcnt2);
1002 #endif
1003           break;
1004 
1005         case wordbound:
1006 	  printf ("/wordbound");
1007 	  break;
1008 
1009 	case notwordbound:
1010 	  printf ("/notwordbound");
1011           break;
1012 
1013 	case wordbeg:
1014 	  printf ("/wordbeg");
1015 	  break;
1016 
1017 	case wordend:
1018 	  printf ("/wordend");
1019 	  break;
1020 
1021 # ifdef emacs
1022 	case before_dot:
1023 	  printf ("/before_dot");
1024           break;
1025 
1026 	case at_dot:
1027 	  printf ("/at_dot");
1028           break;
1029 
1030 	case after_dot:
1031 	  printf ("/after_dot");
1032           break;
1033 
1034 	case syntaxspec:
1035           printf ("/syntaxspec");
1036 	  mcnt = *p++;
1037 	  printf ("/%d", mcnt);
1038           break;
1039 
1040 	case notsyntaxspec:
1041           printf ("/notsyntaxspec");
1042 	  mcnt = *p++;
1043 	  printf ("/%d", mcnt);
1044 	  break;
1045 # endif /* emacs */
1046 
1047 	case wordchar:
1048 	  printf ("/wordchar");
1049           break;
1050 
1051 	case notwordchar:
1052 	  printf ("/notwordchar");
1053           break;
1054 
1055 	case begbuf:
1056 	  printf ("/begbuf");
1057           break;
1058 
1059 	case endbuf:
1060 	  printf ("/endbuf");
1061           break;
1062 
1063         default:
1064           printf ("?%ld", (long int) *(p-1));
1065 	}
1066 
1067       putchar ('\n');
1068     }
1069 
1070 #ifdef _LIBC
1071   printf ("%td:\tend of pattern.\n", p - start);
1072 #else
1073   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1074 #endif
1075 }
1076 
1077 
1078 void
1079 print_compiled_pattern (bufp)
1080     struct re_pattern_buffer *bufp;
1081 {
1082   US_CHAR_TYPE *buffer = (US_CHAR_TYPE*) bufp->buffer;
1083 
1084   print_partial_compiled_pattern (buffer, buffer
1085 				  + bufp->used / sizeof(US_CHAR_TYPE));
1086   printf ("%ld bytes used/%ld bytes allocated.\n",
1087 	  bufp->used, bufp->allocated);
1088 
1089   if (bufp->fastmap_accurate && bufp->fastmap)
1090     {
1091       printf ("fastmap: ");
1092       print_fastmap (bufp->fastmap);
1093     }
1094 
1095 #ifdef _LIBC
1096   printf ("re_nsub: %Zd\t", bufp->re_nsub);
1097 #else
1098   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1099 #endif
1100   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1101   printf ("can_be_null: %d\t", bufp->can_be_null);
1102   printf ("newline_anchor: %d\n", bufp->newline_anchor);
1103   printf ("no_sub: %d\t", bufp->no_sub);
1104   printf ("not_bol: %d\t", bufp->not_bol);
1105   printf ("not_eol: %d\t", bufp->not_eol);
1106   printf ("syntax: %lx\n", bufp->syntax);
1107   /* Perhaps we should print the translate table?  */
1108 }
1109 
1110 
1111 void
1112 print_double_string (where, string1, size1, string2, size2)
1113     const CHAR_TYPE *where;
1114     const CHAR_TYPE *string1;
1115     const CHAR_TYPE *string2;
1116     int size1;
1117     int size2;
1118 {
1119   int this_char;
1120 
1121   if (where == NULL)
1122     printf ("(null)");
1123   else
1124     {
1125       if (FIRST_STRING_P (where))
1126         {
1127           for (this_char = where - string1; this_char < size1; this_char++)
1128 	    PUT_CHAR (string1[this_char]);
1129 
1130           where = string2;
1131         }
1132 
1133       for (this_char = where - string2; this_char < size2; this_char++)
1134         PUT_CHAR (string2[this_char]);
1135     }
1136 }
1137 
1138 void
1139 printchar (c)
1140      int c;
1141 {
1142   putc (c, stderr);
1143 }
1144 
1145 #else /* not DEBUG */
1146 
1147 # undef assert
1148 # define assert(e)
1149 
1150 # define DEBUG_STATEMENT(e)
1151 # define DEBUG_PRINT1(x)
1152 # define DEBUG_PRINT2(x1, x2)
1153 # define DEBUG_PRINT3(x1, x2, x3)
1154 # define DEBUG_PRINT4(x1, x2, x3, x4)
1155 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1156 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1157 
1158 #endif /* not DEBUG */
1159 
1160 #ifdef MBS_SUPPORT
1161 /* This  convert a multibyte string to a wide character string.
1162    And write their correspondances to offset_buffer(see below)
1163    and write whether each wchar_t is binary data to is_binary.
1164    This assume invalid multibyte sequences as binary data.
1165    We assume offset_buffer and is_binary is already allocated
1166    enough space.  */
1167 
1168 static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src,
1169 				  size_t len, int *offset_buffer,
1170 				  char *is_binary);
1171 static size_t
1172 convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
1173      CHAR_TYPE *dest;
1174      const unsigned char* src;
1175      size_t len; /* the length of multibyte string.  */
1176 
1177      /* It hold correspondances between src(char string) and
1178 	dest(wchar_t string) for optimization.
1179 	e.g. src  = "xxxyzz"
1180              dest = {'X', 'Y', 'Z'}
1181 	      (each "xxx", "y" and "zz" represent one multibyte character
1182 	       corresponding to 'X', 'Y' and 'Z'.)
1183 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1184 	  	        = {0, 3, 4, 6}
1185      */
1186      int *offset_buffer;
1187      char *is_binary;
1188 {
1189   wchar_t *pdest = dest;
1190   const unsigned char *psrc = src;
1191   size_t wc_count = 0;
1192 
1193   if (MB_CUR_MAX == 1)
1194     { /* We don't need conversion.  */
1195       for ( ; wc_count < len ; ++wc_count)
1196 	{
1197 	  *pdest++ = *psrc++;
1198 	  is_binary[wc_count] = FALSE;
1199 	  offset_buffer[wc_count] = wc_count;
1200 	}
1201       offset_buffer[wc_count] = wc_count;
1202     }
1203   else
1204     {
1205       /* We need conversion.  */
1206       mbstate_t mbs;
1207       int consumed;
1208       size_t mb_remain = len;
1209       size_t mb_count = 0;
1210 
1211       /* Initialize the conversion state.  */
1212       memset (&mbs, 0, sizeof (mbstate_t));
1213 
1214       offset_buffer[0] = 0;
1215       for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1216 	     psrc += consumed)
1217 	{
1218 	  consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1219 
1220 	  if (consumed <= 0)
1221 	    /* failed to convert. maybe src contains binary data.
1222 	       So we consume 1 byte manualy.  */
1223 	    {
1224 	      *pdest = *psrc;
1225 	      consumed = 1;
1226 	      is_binary[wc_count] = TRUE;
1227 	    }
1228 	  else
1229 	    is_binary[wc_count] = FALSE;
1230 	  /* In sjis encoding, we use yen sign as escape character in
1231 	     place of reverse solidus. So we convert 0x5c(yen sign in
1232 	     sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1233 	     solidus in UCS2).  */
1234 	  if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1235 	    *pdest = (wchar_t) *psrc;
1236 
1237 	  offset_buffer[wc_count + 1] = mb_count += consumed;
1238 	}
1239     }
1240 
1241   return wc_count;
1242 }
1243 
1244 #endif /* MBS_SUPPORT */
1245 
1246 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1247    also be assigned to arbitrarily: each pattern buffer stores its own
1248    syntax, so it can be changed between regex compilations.  */
1249 /* This has no initializer because initialized variables in Emacs
1250    become read-only after dumping.  */
1251 reg_syntax_t re_syntax_options;
1252 
1253 
1254 /* Specify the precise syntax of regexps for compilation.  This provides
1255    for compatibility for various utilities which historically have
1256    different, incompatible syntaxes.
1257 
1258    The argument SYNTAX is a bit mask comprised of the various bits
1259    defined in regex.h.  We return the old syntax.  */
1260 
1261 reg_syntax_t
1262 re_set_syntax (syntax)
1263     reg_syntax_t syntax;
1264 {
1265   reg_syntax_t ret = re_syntax_options;
1266 
1267   re_syntax_options = syntax;
1268 #ifdef DEBUG
1269   if (syntax & RE_DEBUG)
1270     debug = 1;
1271   else if (debug) /* was on but now is not */
1272     debug = 0;
1273 #endif /* DEBUG */
1274   return ret;
1275 }
1276 #ifdef _LIBC
1277 weak_alias (__re_set_syntax, re_set_syntax)
1278 #endif
1279 
1280 /* This table gives an error message for each of the error codes listed
1281    in regex.h.  Obviously the order here has to be same as there.
1282    POSIX doesn't require that we do anything for REG_NOERROR,
1283    but why not be nice?  */
1284 
1285 static const char re_error_msgid[] =
1286   {
1287 #define REG_NOERROR_IDX	0
1288     gettext_noop ("Success")	/* REG_NOERROR */
1289     "\0"
1290 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1291     gettext_noop ("No match")	/* REG_NOMATCH */
1292     "\0"
1293 #define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1294     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1295     "\0"
1296 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1297     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1298     "\0"
1299 #define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1300     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1301     "\0"
1302 #define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1303     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1304     "\0"
1305 #define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1306     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1307     "\0"
1308 #define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1309     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
1310     "\0"
1311 #define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1312     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1313     "\0"
1314 #define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1315     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1316     "\0"
1317 #define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1318     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1319     "\0"
1320 #define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1321     gettext_noop ("Invalid range end")	/* REG_ERANGE */
1322     "\0"
1323 #define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1324     gettext_noop ("Memory exhausted") /* REG_ESPACE */
1325     "\0"
1326 #define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1327     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1328     "\0"
1329 #define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1330     gettext_noop ("Premature end of regular expression") /* REG_EEND */
1331     "\0"
1332 #define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1333     gettext_noop ("Regular expression too big") /* REG_ESIZE */
1334     "\0"
1335 #define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1336     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1337   };
1338 
1339 static const size_t re_error_msgid_idx[] =
1340   {
1341     REG_NOERROR_IDX,
1342     REG_NOMATCH_IDX,
1343     REG_BADPAT_IDX,
1344     REG_ECOLLATE_IDX,
1345     REG_ECTYPE_IDX,
1346     REG_EESCAPE_IDX,
1347     REG_ESUBREG_IDX,
1348     REG_EBRACK_IDX,
1349     REG_EPAREN_IDX,
1350     REG_EBRACE_IDX,
1351     REG_BADBR_IDX,
1352     REG_ERANGE_IDX,
1353     REG_ESPACE_IDX,
1354     REG_BADRPT_IDX,
1355     REG_EEND_IDX,
1356     REG_ESIZE_IDX,
1357     REG_ERPAREN_IDX
1358   };
1359 
1360 /* Avoiding alloca during matching, to placate r_alloc.  */
1361 
1362 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1363    searching and matching functions should not call alloca.  On some
1364    systems, alloca is implemented in terms of malloc, and if we're
1365    using the relocating allocator routines, then malloc could cause a
1366    relocation, which might (if the strings being searched are in the
1367    ralloc heap) shift the data out from underneath the regexp
1368    routines.
1369 
1370    Here's another reason to avoid allocation: Emacs
1371    processes input from X in a signal handler; processing X input may
1372    call malloc; if input arrives while a matching routine is calling
1373    malloc, then we're scrod.  But Emacs can't just block input while
1374    calling matching routines; then we don't notice interrupts when
1375    they come in.  So, Emacs blocks input around all regexp calls
1376    except the matching calls, which it leaves unprotected, in the
1377    faith that they will not malloc.  */
1378 
1379 /* Normally, this is fine.  */
1380 #define MATCH_MAY_ALLOCATE
1381 
1382 /* When using GNU C, we are not REALLY using the C alloca, no matter
1383    what config.h may say.  So don't take precautions for it.  */
1384 #ifdef __GNUC__
1385 # undef C_ALLOCA
1386 #endif
1387 
1388 /* The match routines may not allocate if (1) they would do it with malloc
1389    and (2) it's not safe for them to use malloc.
1390    Note that if REL_ALLOC is defined, matching would not use malloc for the
1391    failure stack, but we would still use it for the register vectors;
1392    so REL_ALLOC should not affect this.  */
1393 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1394 # undef MATCH_MAY_ALLOCATE
1395 #endif
1396 
1397 
1398 /* Failure stack declarations and macros; both re_compile_fastmap and
1399    re_match_2 use a failure stack.  These have to be macros because of
1400    REGEX_ALLOCATE_STACK.  */
1401 
1402 
1403 /* Number of failure points for which to initially allocate space
1404    when matching.  If this number is exceeded, we allocate more
1405    space, so it is not a hard limit.  */
1406 #ifndef INIT_FAILURE_ALLOC
1407 # define INIT_FAILURE_ALLOC 5
1408 #endif
1409 
1410 /* Roughly the maximum number of failure points on the stack.  Would be
1411    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1412    This is a variable only so users of regex can assign to it; we never
1413    change it ourselves.  */
1414 
1415 #ifdef INT_IS_16BIT
1416 
1417 # if defined MATCH_MAY_ALLOCATE
1418 /* 4400 was enough to cause a crash on Alpha OSF/1,
1419    whose default stack limit is 2mb.  */
1420 long int re_max_failures = 4000;
1421 # else
1422 long int re_max_failures = 2000;
1423 # endif
1424 
1425 union fail_stack_elt
1426 {
1427   US_CHAR_TYPE *pointer;
1428   long int integer;
1429 };
1430 
1431 typedef union fail_stack_elt fail_stack_elt_t;
1432 
1433 typedef struct
1434 {
1435   fail_stack_elt_t *stack;
1436   unsigned long int size;
1437   unsigned long int avail;		/* Offset of next open position.  */
1438 } fail_stack_type;
1439 
1440 #else /* not INT_IS_16BIT */
1441 
1442 # if defined MATCH_MAY_ALLOCATE
1443 /* 4400 was enough to cause a crash on Alpha OSF/1,
1444    whose default stack limit is 2mb.  */
1445 int re_max_failures = 4000;
1446 # else
1447 int re_max_failures = 2000;
1448 # endif
1449 
1450 union fail_stack_elt
1451 {
1452   US_CHAR_TYPE *pointer;
1453   int integer;
1454 };
1455 
1456 typedef union fail_stack_elt fail_stack_elt_t;
1457 
1458 typedef struct
1459 {
1460   fail_stack_elt_t *stack;
1461   unsigned size;
1462   unsigned avail;			/* Offset of next open position.  */
1463 } fail_stack_type;
1464 
1465 #endif /* INT_IS_16BIT */
1466 
1467 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1468 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1469 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1470 
1471 
1472 /* Define macros to initialize and free the failure stack.
1473    Do `return -2' if the alloc fails.  */
1474 
1475 #ifdef MATCH_MAY_ALLOCATE
1476 # define INIT_FAIL_STACK()						\
1477   do {									\
1478     fail_stack.stack = (fail_stack_elt_t *)				\
1479       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1480 									\
1481     if (fail_stack.stack == NULL)					\
1482       return -2;							\
1483 									\
1484     fail_stack.size = INIT_FAILURE_ALLOC;				\
1485     fail_stack.avail = 0;						\
1486   } while (0)
1487 
1488 # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1489 #else
1490 # define INIT_FAIL_STACK()						\
1491   do {									\
1492     fail_stack.avail = 0;						\
1493   } while (0)
1494 
1495 # define RESET_FAIL_STACK()
1496 #endif
1497 
1498 
1499 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1500 
1501    Return 1 if succeeds, and 0 if either ran out of memory
1502    allocating space for it or it was already too large.
1503 
1504    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1505 
1506 #define DOUBLE_FAIL_STACK(fail_stack)					\
1507   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1508    ? 0									\
1509    : ((fail_stack).stack = (fail_stack_elt_t *)				\
1510         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1511           (fail_stack).size * sizeof (fail_stack_elt_t),		\
1512           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1513 									\
1514       (fail_stack).stack == NULL					\
1515       ? 0								\
1516       : ((fail_stack).size <<= 1, 					\
1517          1)))
1518 
1519 
1520 /* Push pointer POINTER on FAIL_STACK.
1521    Return 1 if was able to do so and 0 if ran out of memory allocating
1522    space to do so.  */
1523 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1524   ((FAIL_STACK_FULL ()							\
1525     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1526    ? 0									\
1527    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1528       1))
1529 
1530 /* Push a pointer value onto the failure stack.
1531    Assumes the variable `fail_stack'.  Probably should only
1532    be called from within `PUSH_FAILURE_POINT'.  */
1533 #define PUSH_FAILURE_POINTER(item)					\
1534   fail_stack.stack[fail_stack.avail++].pointer = (US_CHAR_TYPE *) (item)
1535 
1536 /* This pushes an integer-valued item onto the failure stack.
1537    Assumes the variable `fail_stack'.  Probably should only
1538    be called from within `PUSH_FAILURE_POINT'.  */
1539 #define PUSH_FAILURE_INT(item)					\
1540   fail_stack.stack[fail_stack.avail++].integer = (item)
1541 
1542 /* Push a fail_stack_elt_t value onto the failure stack.
1543    Assumes the variable `fail_stack'.  Probably should only
1544    be called from within `PUSH_FAILURE_POINT'.  */
1545 #define PUSH_FAILURE_ELT(item)					\
1546   fail_stack.stack[fail_stack.avail++] =  (item)
1547 
1548 /* These three POP... operations complement the three PUSH... operations.
1549    All assume that `fail_stack' is nonempty.  */
1550 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1551 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1552 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1553 
1554 /* Used to omit pushing failure point id's when we're not debugging.  */
1555 #ifdef DEBUG
1556 # define DEBUG_PUSH PUSH_FAILURE_INT
1557 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1558 #else
1559 # define DEBUG_PUSH(item)
1560 # define DEBUG_POP(item_addr)
1561 #endif
1562 
1563 
1564 /* Push the information about the state we will need
1565    if we ever fail back to it.
1566 
1567    Requires variables fail_stack, regstart, regend, reg_info, and
1568    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1569    be declared.
1570 
1571    Does `return FAILURE_CODE' if runs out of memory.  */
1572 
1573 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1574   do {									\
1575     char *destination;							\
1576     /* Must be int, so when we don't save any registers, the arithmetic	\
1577        of 0 + -1 isn't done as unsigned.  */				\
1578     /* Can't be int, since there is not a shred of a guarantee that int	\
1579        is wide enough to hold a value of something to which pointer can	\
1580        be assigned */							\
1581     active_reg_t this_reg;						\
1582     									\
1583     DEBUG_STATEMENT (failure_id++);					\
1584     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1585     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1586     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1587     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1588 									\
1589     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1590     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1591 									\
1592     /* Ensure we have enough space allocated for what we will push.  */	\
1593     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1594       {									\
1595         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1596           return failure_code;						\
1597 									\
1598         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1599 		       (fail_stack).size);				\
1600         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1601       }									\
1602 									\
1603     /* Push the info, starting with the registers.  */			\
1604     DEBUG_PRINT1 ("\n");						\
1605 									\
1606     if (1)								\
1607       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1608 	   this_reg++)							\
1609 	{								\
1610 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1611 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1612 									\
1613 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1614 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1615 									\
1616 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1617 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1618 									\
1619 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1620 			reg_info[this_reg].word.pointer);		\
1621 	  DEBUG_PRINT2 (" match_null=%d",				\
1622 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1623 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1624 	  DEBUG_PRINT2 (" matched_something=%d",			\
1625 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1626 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1627 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1628 	  DEBUG_PRINT1 ("\n");						\
1629 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1630 	}								\
1631 									\
1632     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1633     PUSH_FAILURE_INT (lowest_active_reg);				\
1634 									\
1635     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1636     PUSH_FAILURE_INT (highest_active_reg);				\
1637 									\
1638     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1639     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1640     PUSH_FAILURE_POINTER (pattern_place);				\
1641 									\
1642     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1643     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1644 				 size2);				\
1645     DEBUG_PRINT1 ("'\n");						\
1646     PUSH_FAILURE_POINTER (string_place);				\
1647 									\
1648     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1649     DEBUG_PUSH (failure_id);						\
1650   } while (0)
1651 
1652 /* This is the number of items that are pushed and popped on the stack
1653    for each register.  */
1654 #define NUM_REG_ITEMS  3
1655 
1656 /* Individual items aside from the registers.  */
1657 #ifdef DEBUG
1658 # define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1659 #else
1660 # define NUM_NONREG_ITEMS 4
1661 #endif
1662 
1663 /* We push at most this many items on the stack.  */
1664 /* We used to use (num_regs - 1), which is the number of registers
1665    this regexp will save; but that was changed to 5
1666    to avoid stack overflow for a regexp with lots of parens.  */
1667 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1668 
1669 /* We actually push this many items.  */
1670 #define NUM_FAILURE_ITEMS				\
1671   (((0							\
1672      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1673     * NUM_REG_ITEMS)					\
1674    + NUM_NONREG_ITEMS)
1675 
1676 /* How many items can still be added to the stack without overflowing it.  */
1677 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1678 
1679 
1680 /* Pops what PUSH_FAIL_STACK pushes.
1681 
1682    We restore into the parameters, all of which should be lvalues:
1683      STR -- the saved data position.
1684      PAT -- the saved pattern position.
1685      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1686      REGSTART, REGEND -- arrays of string positions.
1687      REG_INFO -- array of information about each subexpression.
1688 
1689    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1690    `pend', `string1', `size1', `string2', and `size2'.  */
1691 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1692 {									\
1693   DEBUG_STATEMENT (unsigned failure_id;)				\
1694   active_reg_t this_reg;						\
1695   const US_CHAR_TYPE *string_temp;					\
1696 									\
1697   assert (!FAIL_STACK_EMPTY ());					\
1698 									\
1699   /* Remove failure points and point to how many regs pushed.  */	\
1700   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1701   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1702   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1703 									\
1704   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1705 									\
1706   DEBUG_POP (&failure_id);						\
1707   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1708 									\
1709   /* If the saved string location is NULL, it came from an		\
1710      on_failure_keep_string_jump opcode, and we want to throw away the	\
1711      saved NULL, thus retaining our current position in the string.  */	\
1712   string_temp = POP_FAILURE_POINTER ();					\
1713   if (string_temp != NULL)						\
1714     str = (const CHAR_TYPE *) string_temp;				\
1715 									\
1716   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1717   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1718   DEBUG_PRINT1 ("'\n");							\
1719 									\
1720   pat = (US_CHAR_TYPE *) POP_FAILURE_POINTER ();			\
1721   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1722   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1723 									\
1724   /* Restore register info.  */						\
1725   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1726   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1727 									\
1728   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1729   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1730 									\
1731   if (1)								\
1732     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1733       {									\
1734 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1735 									\
1736 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1737 	DEBUG_PRINT2 ("      info: %p\n",				\
1738 		      reg_info[this_reg].word.pointer);			\
1739 									\
1740 	regend[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER ();	\
1741 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1742 									\
1743 	regstart[this_reg] = (const CHAR_TYPE *) POP_FAILURE_POINTER ();\
1744 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1745       }									\
1746   else									\
1747     {									\
1748       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1749 	{								\
1750 	  reg_info[this_reg].word.integer = 0;				\
1751 	  regend[this_reg] = 0;						\
1752 	  regstart[this_reg] = 0;					\
1753 	}								\
1754       highest_active_reg = high_reg;					\
1755     }									\
1756 									\
1757   set_regs_matched_done = 0;						\
1758   DEBUG_STATEMENT (nfailure_points_popped++);				\
1759 } /* POP_FAILURE_POINT */
1760 
1761 
1762 /* Structure for per-register (a.k.a. per-group) information.
1763    Other register information, such as the
1764    starting and ending positions (which are addresses), and the list of
1765    inner groups (which is a bits list) are maintained in separate
1766    variables.
1767 
1768    We are making a (strictly speaking) nonportable assumption here: that
1769    the compiler will pack our bit fields into something that fits into
1770    the type of `word', i.e., is something that fits into one item on the
1771    failure stack.  */
1772 
1773 
1774 /* Declarations and macros for re_match_2.  */
1775 
1776 typedef union
1777 {
1778   fail_stack_elt_t word;
1779   struct
1780   {
1781       /* This field is one if this group can match the empty string,
1782          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1783 #define MATCH_NULL_UNSET_VALUE 3
1784     unsigned match_null_string_p : 2;
1785     unsigned is_active : 1;
1786     unsigned matched_something : 1;
1787     unsigned ever_matched_something : 1;
1788   } bits;
1789 } register_info_type;
1790 
1791 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1792 #define IS_ACTIVE(R)  ((R).bits.is_active)
1793 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1794 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1795 
1796 
1797 /* Call this when have matched a real character; it sets `matched' flags
1798    for the subexpressions which we are currently inside.  Also records
1799    that those subexprs have matched.  */
1800 #define SET_REGS_MATCHED()						\
1801   do									\
1802     {									\
1803       if (!set_regs_matched_done)					\
1804 	{								\
1805 	  active_reg_t r;						\
1806 	  set_regs_matched_done = 1;					\
1807 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1808 	    {								\
1809 	      MATCHED_SOMETHING (reg_info[r])				\
1810 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1811 		= 1;							\
1812 	    }								\
1813 	}								\
1814     }									\
1815   while (0)
1816 
1817 /* Registers are set to a sentinel when they haven't yet matched.  */
1818 static CHAR_TYPE reg_unset_dummy;
1819 #define REG_UNSET_VALUE (&reg_unset_dummy)
1820 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1821 
1822 /* Subroutine declarations and macros for regex_compile.  */
1823 
1824 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1825 					      reg_syntax_t syntax,
1826 					      struct re_pattern_buffer *bufp));
1827 static void store_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc, int arg));
1828 static void store_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
1829 				 int arg1, int arg2));
1830 static void insert_op1 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
1831 				  int arg, US_CHAR_TYPE *end));
1832 static void insert_op2 _RE_ARGS ((re_opcode_t op, US_CHAR_TYPE *loc,
1833 				  int arg1, int arg2, US_CHAR_TYPE *end));
1834 static boolean at_begline_loc_p _RE_ARGS ((const CHAR_TYPE *pattern,
1835 					   const CHAR_TYPE *p,
1836 					   reg_syntax_t syntax));
1837 static boolean at_endline_loc_p _RE_ARGS ((const CHAR_TYPE *p,
1838 					   const CHAR_TYPE *pend,
1839 					   reg_syntax_t syntax));
1840 #ifdef MBS_SUPPORT
1841 static reg_errcode_t compile_range _RE_ARGS ((CHAR_TYPE range_start,
1842 					      const CHAR_TYPE **p_ptr,
1843 					      const CHAR_TYPE *pend,
1844 					      char *translate,
1845 					      reg_syntax_t syntax,
1846 					      US_CHAR_TYPE *b,
1847 					      CHAR_TYPE *char_set));
1848 static void insert_space _RE_ARGS ((int num, CHAR_TYPE *loc, CHAR_TYPE *end));
1849 #else
1850 static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
1851 					      const CHAR_TYPE **p_ptr,
1852 					      const CHAR_TYPE *pend,
1853 					      char *translate,
1854 					      reg_syntax_t syntax,
1855 					      US_CHAR_TYPE *b));
1856 #endif /* MBS_SUPPORT */
1857 
1858 /* Fetch the next character in the uncompiled pattern---translating it
1859    if necessary.  Also cast from a signed character in the constant
1860    string passed to us by the user to an unsigned char that we can use
1861    as an array index (in, e.g., `translate').  */
1862 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1863    because it is impossible to allocate 4GB array for some encodings
1864    which have 4 byte character_set like UCS4.  */
1865 #ifndef PATFETCH
1866 # ifdef MBS_SUPPORT
1867 #  define PATFETCH(c)							\
1868   do {if (p == pend) return REG_EEND;					\
1869     c = (US_CHAR_TYPE) *p++;						\
1870     if (translate && (c <= 0xff)) c = (US_CHAR_TYPE) translate[c];	\
1871   } while (0)
1872 # else
1873 #  define PATFETCH(c)							\
1874   do {if (p == pend) return REG_EEND;					\
1875     c = (unsigned char) *p++;						\
1876     if (translate) c = (unsigned char) translate[c];			\
1877   } while (0)
1878 # endif /* MBS_SUPPORT */
1879 #endif
1880 
1881 /* Fetch the next character in the uncompiled pattern, with no
1882    translation.  */
1883 #define PATFETCH_RAW(c)							\
1884   do {if (p == pend) return REG_EEND;					\
1885     c = (US_CHAR_TYPE) *p++; 						\
1886   } while (0)
1887 
1888 /* Go backwards one character in the pattern.  */
1889 #define PATUNFETCH p--
1890 
1891 
1892 /* If `translate' is non-null, return translate[D], else just D.  We
1893    cast the subscript to translate because some data is declared as
1894    `char *', to avoid warnings when a string constant is passed.  But
1895    when we use a character as a subscript we must make it unsigned.  */
1896 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1897    because it is impossible to allocate 4GB array for some encodings
1898    which have 4 byte character_set like UCS4.  */
1899 #ifndef TRANSLATE
1900 # ifdef MBS_SUPPORT
1901 #  define TRANSLATE(d) \
1902   ((translate && ((US_CHAR_TYPE) (d)) <= 0xff) \
1903    ? (char) translate[(unsigned char) (d)] : (d))
1904 #else
1905 #  define TRANSLATE(d) \
1906   (translate ? (char) translate[(unsigned char) (d)] : (d))
1907 # endif /* MBS_SUPPORT */
1908 #endif
1909 
1910 
1911 /* Macros for outputting the compiled pattern into `buffer'.  */
1912 
1913 /* If the buffer isn't allocated when it comes in, use this.  */
1914 #define INIT_BUF_SIZE  (32 * sizeof(US_CHAR_TYPE))
1915 
1916 /* Make sure we have at least N more bytes of space in buffer.  */
1917 #ifdef MBS_SUPPORT
1918 # define GET_BUFFER_SPACE(n)						\
1919     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
1920             + (n)*sizeof(CHAR_TYPE)) > bufp->allocated)			\
1921       EXTEND_BUFFER ()
1922 #else
1923 # define GET_BUFFER_SPACE(n)						\
1924     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1925       EXTEND_BUFFER ()
1926 #endif /* MBS_SUPPORT */
1927 
1928 /* Make sure we have one more byte of buffer space and then add C to it.  */
1929 #define BUF_PUSH(c)							\
1930   do {									\
1931     GET_BUFFER_SPACE (1);						\
1932     *b++ = (US_CHAR_TYPE) (c);						\
1933   } while (0)
1934 
1935 
1936 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1937 #define BUF_PUSH_2(c1, c2)						\
1938   do {									\
1939     GET_BUFFER_SPACE (2);						\
1940     *b++ = (US_CHAR_TYPE) (c1);					\
1941     *b++ = (US_CHAR_TYPE) (c2);					\
1942   } while (0)
1943 
1944 
1945 /* As with BUF_PUSH_2, except for three bytes.  */
1946 #define BUF_PUSH_3(c1, c2, c3)						\
1947   do {									\
1948     GET_BUFFER_SPACE (3);						\
1949     *b++ = (US_CHAR_TYPE) (c1);					\
1950     *b++ = (US_CHAR_TYPE) (c2);					\
1951     *b++ = (US_CHAR_TYPE) (c3);					\
1952   } while (0)
1953 
1954 /* Store a jump with opcode OP at LOC to location TO.  We store a
1955    relative address offset by the three bytes the jump itself occupies.  */
1956 #define STORE_JUMP(op, loc, to) \
1957   store_op1 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
1958 
1959 /* Likewise, for a two-argument jump.  */
1960 #define STORE_JUMP2(op, loc, to, arg) \
1961   store_op2 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
1962 
1963 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1964 #define INSERT_JUMP(op, loc, to) \
1965   insert_op1 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
1966 
1967 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1968 #define INSERT_JUMP2(op, loc, to, arg) \
1969   insert_op2 (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
1970 	      arg, b)
1971 
1972 
1973 /* This is not an arbitrary limit: the arguments which represent offsets
1974    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1975    be too small, many things would have to change.  */
1976 /* Any other compiler which, like MSC, has allocation limit below 2^16
1977    bytes will have to use approach similar to what was done below for
1978    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1979    reallocating to 0 bytes.  Such thing is not going to work too well.
1980    You have been warned!!  */
1981 #if defined _MSC_VER  && !defined WIN32
1982 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1983    The REALLOC define eliminates a flurry of conversion warnings,
1984    but is not required. */
1985 # define MAX_BUF_SIZE  65500L
1986 # define REALLOC(p,s) realloc ((p), (size_t) (s))
1987 #else
1988 # define MAX_BUF_SIZE (1L << 16)
1989 # define REALLOC(p,s) realloc ((p), (s))
1990 #endif
1991 
1992 /* Extend the buffer by twice its current size via realloc and
1993    reset the pointers that pointed into the old block to point to the
1994    correct places in the new one.  If extending the buffer results in it
1995    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1996 #if __BOUNDED_POINTERS__
1997 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1998 # define MOVE_BUFFER_POINTER(P) \
1999   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2000 # define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
2001   else						\
2002     {						\
2003       SET_HIGH_BOUND (b);			\
2004       SET_HIGH_BOUND (begalt);			\
2005       if (fixup_alt_jump)			\
2006 	SET_HIGH_BOUND (fixup_alt_jump);	\
2007       if (laststart)				\
2008 	SET_HIGH_BOUND (laststart);		\
2009       if (pending_exact)			\
2010 	SET_HIGH_BOUND (pending_exact);		\
2011     }
2012 #else
2013 # define MOVE_BUFFER_POINTER(P) (P) += incr
2014 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
2015 #endif
2016 
2017 #ifdef MBS_SUPPORT
2018 # define EXTEND_BUFFER()						\
2019   do {									\
2020     US_CHAR_TYPE *old_buffer = COMPILED_BUFFER_VAR;			\
2021     int wchar_count;							\
2022     if (bufp->allocated + sizeof(US_CHAR_TYPE) > MAX_BUF_SIZE)		\
2023       return REG_ESIZE;							\
2024     bufp->allocated <<= 1;						\
2025     if (bufp->allocated > MAX_BUF_SIZE)					\
2026       bufp->allocated = MAX_BUF_SIZE;					\
2027     /* How many characters the new buffer can have?  */			\
2028     wchar_count = bufp->allocated / sizeof(US_CHAR_TYPE);		\
2029     if (wchar_count == 0) wchar_count = 1;				\
2030     /* Truncate the buffer to CHAR_TYPE align.  */			\
2031     bufp->allocated = wchar_count * sizeof(US_CHAR_TYPE);		\
2032     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, US_CHAR_TYPE);		\
2033     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
2034     if (COMPILED_BUFFER_VAR == NULL)					\
2035       return REG_ESPACE;						\
2036     /* If the buffer moved, move all the pointers into it.  */		\
2037     if (old_buffer != COMPILED_BUFFER_VAR)				\
2038       {									\
2039 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2040 	MOVE_BUFFER_POINTER (b);					\
2041 	MOVE_BUFFER_POINTER (begalt);					\
2042 	if (fixup_alt_jump)						\
2043 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2044 	if (laststart)							\
2045 	  MOVE_BUFFER_POINTER (laststart);				\
2046 	if (pending_exact)						\
2047 	  MOVE_BUFFER_POINTER (pending_exact);				\
2048       }									\
2049     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2050   } while (0)
2051 #else
2052 # define EXTEND_BUFFER()						\
2053   do {									\
2054     US_CHAR_TYPE *old_buffer = COMPILED_BUFFER_VAR;			\
2055     if (bufp->allocated == MAX_BUF_SIZE)				\
2056       return REG_ESIZE;							\
2057     bufp->allocated <<= 1;						\
2058     if (bufp->allocated > MAX_BUF_SIZE)					\
2059       bufp->allocated = MAX_BUF_SIZE;					\
2060     bufp->buffer = (US_CHAR_TYPE *) REALLOC (COMPILED_BUFFER_VAR,	\
2061 						bufp->allocated);	\
2062     if (COMPILED_BUFFER_VAR == NULL)					\
2063       return REG_ESPACE;						\
2064     /* If the buffer moved, move all the pointers into it.  */		\
2065     if (old_buffer != COMPILED_BUFFER_VAR)				\
2066       {									\
2067 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2068 	MOVE_BUFFER_POINTER (b);					\
2069 	MOVE_BUFFER_POINTER (begalt);					\
2070 	if (fixup_alt_jump)						\
2071 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2072 	if (laststart)							\
2073 	  MOVE_BUFFER_POINTER (laststart);				\
2074 	if (pending_exact)						\
2075 	  MOVE_BUFFER_POINTER (pending_exact);				\
2076       }									\
2077     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2078   } while (0)
2079 #endif /* MBS_SUPPORT */
2080 
2081 /* Since we have one byte reserved for the register number argument to
2082    {start,stop}_memory, the maximum number of groups we can report
2083    things about is what fits in that byte.  */
2084 #define MAX_REGNUM 255
2085 
2086 /* But patterns can have more than `MAX_REGNUM' registers.  We just
2087    ignore the excess.  */
2088 typedef unsigned regnum_t;
2089 
2090 
2091 /* Macros for the compile stack.  */
2092 
2093 /* Since offsets can go either forwards or backwards, this type needs to
2094    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
2095 /* int may be not enough when sizeof(int) == 2.  */
2096 typedef long pattern_offset_t;
2097 
2098 typedef struct
2099 {
2100   pattern_offset_t begalt_offset;
2101   pattern_offset_t fixup_alt_jump;
2102   pattern_offset_t inner_group_offset;
2103   pattern_offset_t laststart_offset;
2104   regnum_t regnum;
2105 } compile_stack_elt_t;
2106 
2107 
2108 typedef struct
2109 {
2110   compile_stack_elt_t *stack;
2111   unsigned size;
2112   unsigned avail;			/* Offset of next open position.  */
2113 } compile_stack_type;
2114 
2115 
2116 #define INIT_COMPILE_STACK_SIZE 32
2117 
2118 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
2119 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
2120 
2121 /* The next available element.  */
2122 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2123 
2124 
2125 /* Set the bit for character C in a list.  */
2126 #define SET_LIST_BIT(c)                               \
2127   (b[((unsigned char) (c)) / BYTEWIDTH]               \
2128    |= 1 << (((unsigned char) c) % BYTEWIDTH))
2129 
2130 
2131 /* Get the next unsigned number in the uncompiled pattern.  */
2132 #define GET_UNSIGNED_NUMBER(num) 					\
2133   {									\
2134     while (p != pend)							\
2135       {									\
2136 	PATFETCH (c);							\
2137 	if (! ('0' <= c && c <= '9'))					\
2138 	  break;							\
2139 	if (num <= RE_DUP_MAX)						\
2140 	  {								\
2141 	    if (num < 0)						\
2142 	      num = 0;							\
2143 	    num = num * 10 + c - '0';					\
2144 	  }								\
2145       }									\
2146   }
2147 
2148 #if defined _LIBC || WIDE_CHAR_SUPPORT
2149 /* The GNU C library provides support for user-defined character classes
2150    and the functions from ISO C amendement 1.  */
2151 # ifdef CHARCLASS_NAME_MAX
2152 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2153 # else
2154 /* This shouldn't happen but some implementation might still have this
2155    problem.  Use a reasonable default value.  */
2156 #  define CHAR_CLASS_MAX_LENGTH 256
2157 # endif
2158 
2159 # ifdef _LIBC
2160 #  define IS_CHAR_CLASS(string) __wctype (string)
2161 # else
2162 #  define IS_CHAR_CLASS(string) wctype (string)
2163 # endif
2164 #else
2165 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
2166 
2167 # define IS_CHAR_CLASS(string)						\
2168    (STREQ (string, "alpha") || STREQ (string, "upper")			\
2169     || STREQ (string, "lower") || STREQ (string, "digit")		\
2170     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
2171     || STREQ (string, "space") || STREQ (string, "print")		\
2172     || STREQ (string, "punct") || STREQ (string, "graph")		\
2173     || STREQ (string, "cntrl") || STREQ (string, "blank"))
2174 #endif
2175 
2176 #ifndef MATCH_MAY_ALLOCATE
2177 
2178 /* If we cannot allocate large objects within re_match_2_internal,
2179    we make the fail stack and register vectors global.
2180    The fail stack, we grow to the maximum size when a regexp
2181    is compiled.
2182    The register vectors, we adjust in size each time we
2183    compile a regexp, according to the number of registers it needs.  */
2184 
2185 static fail_stack_type fail_stack;
2186 
2187 /* Size with which the following vectors are currently allocated.
2188    That is so we can make them bigger as needed,
2189    but never make them smaller.  */
2190 static int regs_allocated_size;
2191 
2192 static const char **     regstart, **     regend;
2193 static const char ** old_regstart, ** old_regend;
2194 static const char **best_regstart, **best_regend;
2195 static register_info_type *reg_info;
2196 static const char **reg_dummy;
2197 static register_info_type *reg_info_dummy;
2198 
2199 /* Make the register vectors big enough for NUM_REGS registers,
2200    but don't make them smaller.  */
2201 
2202 static
2203 regex_grow_registers (num_regs)
2204      int num_regs;
2205 {
2206   if (num_regs > regs_allocated_size)
2207     {
2208       RETALLOC_IF (regstart,	 num_regs, const char *);
2209       RETALLOC_IF (regend,	 num_regs, const char *);
2210       RETALLOC_IF (old_regstart, num_regs, const char *);
2211       RETALLOC_IF (old_regend,	 num_regs, const char *);
2212       RETALLOC_IF (best_regstart, num_regs, const char *);
2213       RETALLOC_IF (best_regend,	 num_regs, const char *);
2214       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
2215       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2216       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
2217 
2218       regs_allocated_size = num_regs;
2219     }
2220 }
2221 
2222 #endif /* not MATCH_MAY_ALLOCATE */
2223 
2224 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2225 						 compile_stack,
2226 						 regnum_t regnum));
2227 
2228 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2229    Returns one of error codes defined in `regex.h', or zero for success.
2230 
2231    Assumes the `allocated' (and perhaps `buffer') and `translate'
2232    fields are set in BUFP on entry.
2233 
2234    If it succeeds, results are put in BUFP (if it returns an error, the
2235    contents of BUFP are undefined):
2236      `buffer' is the compiled pattern;
2237      `syntax' is set to SYNTAX;
2238      `used' is set to the length of the compiled pattern;
2239      `fastmap_accurate' is zero;
2240      `re_nsub' is the number of subexpressions in PATTERN;
2241      `not_bol' and `not_eol' are zero;
2242 
2243    The `fastmap' and `newline_anchor' fields are neither
2244    examined nor set.  */
2245 
2246 /* Return, freeing storage we allocated.  */
2247 #ifdef MBS_SUPPORT
2248 # define FREE_STACK_RETURN(value)		\
2249   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2250 #else
2251 # define FREE_STACK_RETURN(value)		\
2252   return (free (compile_stack.stack), value)
2253 #endif /* MBS_SUPPORT */
2254 
2255 static reg_errcode_t
2256 #ifdef MBS_SUPPORT
2257 regex_compile (cpattern, csize, syntax, bufp)
2258      const char *cpattern;
2259      size_t csize;
2260 #else
2261 regex_compile (pattern, size, syntax, bufp)
2262      const char *pattern;
2263      size_t size;
2264 #endif /* MBS_SUPPORT */
2265      reg_syntax_t syntax;
2266      struct re_pattern_buffer *bufp;
2267 {
2268   /* We fetch characters from PATTERN here.  Even though PATTERN is
2269      `char *' (i.e., signed), we declare these variables as unsigned, so
2270      they can be reliably used as array indices.  */
2271   register US_CHAR_TYPE c, c1;
2272 
2273 #ifdef MBS_SUPPORT
2274   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
2275   CHAR_TYPE *pattern, *COMPILED_BUFFER_VAR;
2276   size_t size;
2277   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
2278   int *mbs_offset = NULL;
2279   /* It hold whether each wchar_t is binary data or not.  */
2280   char *is_binary = NULL;
2281   /* A flag whether exactn is handling binary data or not.  */
2282   char is_exactn_bin = FALSE;
2283 #endif /* MBS_SUPPORT */
2284 
2285   /* A random temporary spot in PATTERN.  */
2286   const CHAR_TYPE *p1;
2287 
2288   /* Points to the end of the buffer, where we should append.  */
2289   register US_CHAR_TYPE *b;
2290 
2291   /* Keeps track of unclosed groups.  */
2292   compile_stack_type compile_stack;
2293 
2294   /* Points to the current (ending) position in the pattern.  */
2295 #ifdef MBS_SUPPORT
2296   const CHAR_TYPE *p;
2297   const CHAR_TYPE *pend;
2298 #else
2299   const CHAR_TYPE *p = pattern;
2300   const CHAR_TYPE *pend = pattern + size;
2301 #endif /* MBS_SUPPORT */
2302 
2303   /* How to translate the characters in the pattern.  */
2304   RE_TRANSLATE_TYPE translate = bufp->translate;
2305 
2306   /* Address of the count-byte of the most recently inserted `exactn'
2307      command.  This makes it possible to tell if a new exact-match
2308      character can be added to that command or if the character requires
2309      a new `exactn' command.  */
2310   US_CHAR_TYPE *pending_exact = 0;
2311 
2312   /* Address of start of the most recently finished expression.
2313      This tells, e.g., postfix * where to find the start of its
2314      operand.  Reset at the beginning of groups and alternatives.  */
2315   US_CHAR_TYPE *laststart = 0;
2316 
2317   /* Address of beginning of regexp, or inside of last group.  */
2318   US_CHAR_TYPE *begalt;
2319 
2320   /* Address of the place where a forward jump should go to the end of
2321      the containing expression.  Each alternative of an `or' -- except the
2322      last -- ends with a forward jump of this sort.  */
2323   US_CHAR_TYPE *fixup_alt_jump = 0;
2324 
2325   /* Counts open-groups as they are encountered.  Remembered for the
2326      matching close-group on the compile stack, so the same register
2327      number is put in the stop_memory as the start_memory.  */
2328   regnum_t regnum = 0;
2329 
2330 #ifdef MBS_SUPPORT
2331   /* Initialize the wchar_t PATTERN and offset_buffer.  */
2332   p = pend = pattern = TALLOC(csize + 1, CHAR_TYPE);
2333   p[csize] = L'\0';	/* sentinel */
2334   mbs_offset = TALLOC(csize + 1, int);
2335   is_binary = TALLOC(csize + 1, char);
2336   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2337     {
2338       if (pattern) free(pattern);
2339       if (mbs_offset) free(mbs_offset);
2340       if (is_binary) free(is_binary);
2341       return REG_ESPACE;
2342     }
2343   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2344   pend = p + size;
2345   if (size < 0)
2346     {
2347       if (pattern) free(pattern);
2348       if (mbs_offset) free(mbs_offset);
2349       if (is_binary) free(is_binary);
2350       return REG_BADPAT;
2351     }
2352 #endif
2353 
2354 #ifdef DEBUG
2355   DEBUG_PRINT1 ("\nCompiling pattern: ");
2356   if (debug)
2357     {
2358       unsigned debug_count;
2359 
2360       for (debug_count = 0; debug_count < size; debug_count++)
2361         PUT_CHAR (pattern[debug_count]);
2362       putchar ('\n');
2363     }
2364 #endif /* DEBUG */
2365 
2366   /* Initialize the compile stack.  */
2367   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2368   if (compile_stack.stack == NULL)
2369     {
2370 #ifdef MBS_SUPPORT
2371       if (pattern) free(pattern);
2372       if (mbs_offset) free(mbs_offset);
2373       if (is_binary) free(is_binary);
2374 #endif
2375       return REG_ESPACE;
2376     }
2377 
2378   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2379   compile_stack.avail = 0;
2380 
2381   /* Initialize the pattern buffer.  */
2382   bufp->syntax = syntax;
2383   bufp->fastmap_accurate = 0;
2384   bufp->not_bol = bufp->not_eol = 0;
2385 
2386   /* Set `used' to zero, so that if we return an error, the pattern
2387      printer (for debugging) will think there's no pattern.  We reset it
2388      at the end.  */
2389   bufp->used = 0;
2390 
2391   /* Always count groups, whether or not bufp->no_sub is set.  */
2392   bufp->re_nsub = 0;
2393 
2394 #if !defined emacs && !defined SYNTAX_TABLE
2395   /* Initialize the syntax table.  */
2396    init_syntax_once ();
2397 #endif
2398 
2399   if (bufp->allocated == 0)
2400     {
2401       if (bufp->buffer)
2402 	{ /* If zero allocated, but buffer is non-null, try to realloc
2403              enough space.  This loses if buffer's address is bogus, but
2404              that is the user's responsibility.  */
2405 #ifdef MBS_SUPPORT
2406 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
2407 	     buffer.  */
2408           free(bufp->buffer);
2409           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(US_CHAR_TYPE),
2410 					US_CHAR_TYPE);
2411 #else
2412           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, US_CHAR_TYPE);
2413 #endif /* MBS_SUPPORT */
2414         }
2415       else
2416         { /* Caller did not allocate a buffer.  Do it for them.  */
2417           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(US_CHAR_TYPE),
2418 					US_CHAR_TYPE);
2419         }
2420 
2421       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2422 #ifdef MBS_SUPPORT
2423       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2424 #endif /* MBS_SUPPORT */
2425       bufp->allocated = INIT_BUF_SIZE;
2426     }
2427 #ifdef MBS_SUPPORT
2428   else
2429     COMPILED_BUFFER_VAR = (US_CHAR_TYPE*) bufp->buffer;
2430 #endif
2431 
2432   begalt = b = COMPILED_BUFFER_VAR;
2433 
2434   /* Loop through the uncompiled pattern until we're at the end.  */
2435   while (p != pend)
2436     {
2437       PATFETCH (c);
2438 
2439       switch (c)
2440         {
2441         case '^':
2442           {
2443             if (   /* If at start of pattern, it's an operator.  */
2444                    p == pattern + 1
2445                    /* If context independent, it's an operator.  */
2446                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2447                    /* Otherwise, depends on what's come before.  */
2448                 || at_begline_loc_p (pattern, p, syntax))
2449               BUF_PUSH (begline);
2450             else
2451               goto normal_char;
2452           }
2453           break;
2454 
2455 
2456         case '$':
2457           {
2458             if (   /* If at end of pattern, it's an operator.  */
2459                    p == pend
2460                    /* If context independent, it's an operator.  */
2461                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2462                    /* Otherwise, depends on what's next.  */
2463                 || at_endline_loc_p (p, pend, syntax))
2464                BUF_PUSH (endline);
2465              else
2466                goto normal_char;
2467            }
2468            break;
2469 
2470 
2471 	case '+':
2472         case '?':
2473           if ((syntax & RE_BK_PLUS_QM)
2474               || (syntax & RE_LIMITED_OPS))
2475             goto normal_char;
2476         handle_plus:
2477         case '*':
2478           /* If there is no previous pattern... */
2479           if (!laststart)
2480             {
2481               if (syntax & RE_CONTEXT_INVALID_OPS)
2482                 FREE_STACK_RETURN (REG_BADRPT);
2483               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2484                 goto normal_char;
2485             }
2486 
2487           {
2488             /* Are we optimizing this jump?  */
2489             boolean keep_string_p = false;
2490 
2491             /* 1 means zero (many) matches is allowed.  */
2492             char zero_times_ok = 0, many_times_ok = 0;
2493 
2494             /* If there is a sequence of repetition chars, collapse it
2495                down to just one (the right one).  We can't combine
2496                interval operators with these because of, e.g., `a{2}*',
2497                which should only match an even number of `a's.  */
2498 
2499             for (;;)
2500               {
2501                 zero_times_ok |= c != '+';
2502                 many_times_ok |= c != '?';
2503 
2504                 if (p == pend)
2505                   break;
2506 
2507                 PATFETCH (c);
2508 
2509                 if (c == '*'
2510                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2511                   ;
2512 
2513                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2514                   {
2515                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2516 
2517                     PATFETCH (c1);
2518                     if (!(c1 == '+' || c1 == '?'))
2519                       {
2520                         PATUNFETCH;
2521                         PATUNFETCH;
2522                         break;
2523                       }
2524 
2525                     c = c1;
2526                   }
2527                 else
2528                   {
2529                     PATUNFETCH;
2530                     break;
2531                   }
2532 
2533                 /* If we get here, we found another repeat character.  */
2534                }
2535 
2536             /* Star, etc. applied to an empty pattern is equivalent
2537                to an empty pattern.  */
2538             if (!laststart)
2539               break;
2540 
2541             /* Now we know whether or not zero matches is allowed
2542                and also whether or not two or more matches is allowed.  */
2543             if (many_times_ok)
2544               { /* More than one repetition is allowed, so put in at the
2545                    end a backward relative jump from `b' to before the next
2546                    jump we're going to put in below (which jumps from
2547                    laststart to after this jump).
2548 
2549                    But if we are at the `*' in the exact sequence `.*\n',
2550                    insert an unconditional jump backwards to the .,
2551                    instead of the beginning of the loop.  This way we only
2552                    push a failure point once, instead of every time
2553                    through the loop.  */
2554                 assert (p - 1 > pattern);
2555 
2556                 /* Allocate the space for the jump.  */
2557                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2558 
2559                 /* We know we are not at the first character of the pattern,
2560                    because laststart was nonzero.  And we've already
2561                    incremented `p', by the way, to be the character after
2562                    the `*'.  Do we have to do something analogous here
2563                    for null bytes, because of RE_DOT_NOT_NULL?  */
2564                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2565 		    && zero_times_ok
2566                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2567                     && !(syntax & RE_DOT_NEWLINE))
2568                   { /* We have .*\n.  */
2569                     STORE_JUMP (jump, b, laststart);
2570                     keep_string_p = true;
2571                   }
2572                 else
2573                   /* Anything else.  */
2574                   STORE_JUMP (maybe_pop_jump, b, laststart -
2575 			      (1 + OFFSET_ADDRESS_SIZE));
2576 
2577                 /* We've added more stuff to the buffer.  */
2578                 b += 1 + OFFSET_ADDRESS_SIZE;
2579               }
2580 
2581             /* On failure, jump from laststart to b + 3, which will be the
2582                end of the buffer after this jump is inserted.  */
2583 	    /* ifdef MBS_SUPPORT, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2584 	       'b + 3'.  */
2585             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2586             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2587                                        : on_failure_jump,
2588                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2589             pending_exact = 0;
2590             b += 1 + OFFSET_ADDRESS_SIZE;
2591 
2592             if (!zero_times_ok)
2593               {
2594                 /* At least one repetition is required, so insert a
2595                    `dummy_failure_jump' before the initial
2596                    `on_failure_jump' instruction of the loop. This
2597                    effects a skip over that instruction the first time
2598                    we hit that loop.  */
2599                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2600                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2601 			     2 + 2 * OFFSET_ADDRESS_SIZE);
2602                 b += 1 + OFFSET_ADDRESS_SIZE;
2603               }
2604             }
2605 	  break;
2606 
2607 
2608 	case '.':
2609           laststart = b;
2610           BUF_PUSH (anychar);
2611           break;
2612 
2613 
2614         case '[':
2615           {
2616             boolean had_char_class = false;
2617 #ifdef MBS_SUPPORT
2618 	    CHAR_TYPE range_start = 0xffffffff;
2619 #else
2620 	    unsigned int range_start = 0xffffffff;
2621 #endif
2622             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2623 
2624 #ifdef MBS_SUPPORT
2625 	    /* We assume a charset(_not) structure as a wchar_t array.
2626 	       charset[0] = (re_opcode_t) charset(_not)
2627                charset[1] = l (= length of char_classes)
2628                charset[2] = m (= length of collating_symbols)
2629                charset[3] = n (= length of equivalence_classes)
2630 	       charset[4] = o (= length of char_ranges)
2631 	       charset[5] = p (= length of chars)
2632 
2633                charset[6] = char_class (wctype_t)
2634                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2635                          ...
2636                charset[l+5]  = char_class (wctype_t)
2637 
2638                charset[l+6]  = collating_symbol (wchar_t)
2639                             ...
2640                charset[l+m+5]  = collating_symbol (wchar_t)
2641 					ifdef _LIBC we use the index if
2642 					_NL_COLLATE_SYMB_EXTRAMB instead of
2643 					wchar_t string.
2644 
2645                charset[l+m+6]  = equivalence_classes (wchar_t)
2646                               ...
2647                charset[l+m+n+5]  = equivalence_classes (wchar_t)
2648 					ifdef _LIBC we use the index in
2649 					_NL_COLLATE_WEIGHT instead of
2650 					wchar_t string.
2651 
2652 	       charset[l+m+n+6] = range_start
2653 	       charset[l+m+n+7] = range_end
2654 	                       ...
2655 	       charset[l+m+n+2o+4] = range_start
2656 	       charset[l+m+n+2o+5] = range_end
2657 					ifdef _LIBC we use the value looked up
2658 					in _NL_COLLATE_COLLSEQ instead of
2659 					wchar_t character.
2660 
2661 	       charset[l+m+n+2o+6] = char
2662 	                          ...
2663 	       charset[l+m+n+2o+p+5] = char
2664 
2665 	     */
2666 
2667 	    /* We need at least 6 spaces: the opcode, the length of
2668                char_classes, the length of collating_symbols, the length of
2669                equivalence_classes, the length of char_ranges, the length of
2670                chars.  */
2671 	    GET_BUFFER_SPACE (6);
2672 
2673 	    /* Save b as laststart. And We use laststart as the pointer
2674 	       to the first element of the charset here.
2675 	       In other words, laststart[i] indicates charset[i].  */
2676             laststart = b;
2677 
2678             /* We test `*p == '^' twice, instead of using an if
2679                statement, so we only need one BUF_PUSH.  */
2680             BUF_PUSH (*p == '^' ? charset_not : charset);
2681             if (*p == '^')
2682               p++;
2683 
2684             /* Push the length of char_classes, the length of
2685                collating_symbols, the length of equivalence_classes, the
2686                length of char_ranges and the length of chars.  */
2687             BUF_PUSH_3 (0, 0, 0);
2688             BUF_PUSH_2 (0, 0);
2689 
2690             /* Remember the first position in the bracket expression.  */
2691             p1 = p;
2692 
2693             /* charset_not matches newline according to a syntax bit.  */
2694             if ((re_opcode_t) b[-6] == charset_not
2695                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2696 	      {
2697 		BUF_PUSH('\n');
2698 		laststart[5]++; /* Update the length of characters  */
2699 	      }
2700 
2701             /* Read in characters and ranges, setting map bits.  */
2702             for (;;)
2703               {
2704                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2705 
2706                 PATFETCH (c);
2707 
2708                 /* \ might escape characters inside [...] and [^...].  */
2709                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2710                   {
2711                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2712 
2713                     PATFETCH (c1);
2714 		    BUF_PUSH(c1);
2715 		    laststart[5]++; /* Update the length of chars  */
2716 		    range_start = c1;
2717                     continue;
2718                   }
2719 
2720                 /* Could be the end of the bracket expression.  If it's
2721                    not (i.e., when the bracket expression is `[]' so
2722                    far), the ']' character bit gets set way below.  */
2723                 if (c == ']' && p != p1 + 1)
2724                   break;
2725 
2726                 /* Look ahead to see if it's a range when the last thing
2727                    was a character class.  */
2728                 if (had_char_class && c == '-' && *p != ']')
2729                   FREE_STACK_RETURN (REG_ERANGE);
2730 
2731                 /* Look ahead to see if it's a range when the last thing
2732                    was a character: if this is a hyphen not at the
2733                    beginning or the end of a list, then it's the range
2734                    operator.  */
2735                 if (c == '-'
2736                     && !(p - 2 >= pattern && p[-2] == '[')
2737                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2738                     && *p != ']')
2739                   {
2740                     reg_errcode_t ret;
2741 		    /* Allocate the space for range_start and range_end.  */
2742 		    GET_BUFFER_SPACE (2);
2743 		    /* Update the pointer to indicate end of buffer.  */
2744                     b += 2;
2745                     ret = compile_range (range_start, &p, pend, translate,
2746                                          syntax, b, laststart);
2747                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2748                     range_start = 0xffffffff;
2749                   }
2750                 else if (p[0] == '-' && p[1] != ']')
2751                   { /* This handles ranges made up of characters only.  */
2752                     reg_errcode_t ret;
2753 
2754 		    /* Move past the `-'.  */
2755                     PATFETCH (c1);
2756 		    /* Allocate the space for range_start and range_end.  */
2757 		    GET_BUFFER_SPACE (2);
2758 		    /* Update the pointer to indicate end of buffer.  */
2759                     b += 2;
2760                     ret = compile_range (c, &p, pend, translate, syntax, b,
2761                                          laststart);
2762                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2763 		    range_start = 0xffffffff;
2764                   }
2765 
2766                 /* See if we're at the beginning of a possible character
2767                    class.  */
2768                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2769                   { /* Leave room for the null.  */
2770                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2771 
2772                     PATFETCH (c);
2773                     c1 = 0;
2774 
2775                     /* If pattern is `[[:'.  */
2776                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2777 
2778                     for (;;)
2779                       {
2780                         PATFETCH (c);
2781                         if ((c == ':' && *p == ']') || p == pend)
2782                           break;
2783 			if (c1 < CHAR_CLASS_MAX_LENGTH)
2784 			  str[c1++] = c;
2785 			else
2786 			  /* This is in any case an invalid class name.  */
2787 			  str[0] = '\0';
2788                       }
2789                     str[c1] = '\0';
2790 
2791                     /* If isn't a word bracketed by `[:' and `:]':
2792                        undo the ending character, the letters, and leave
2793                        the leading `:' and `[' (but store them as character).  */
2794                     if (c == ':' && *p == ']')
2795                       {
2796 			wctype_t wt;
2797 			uintptr_t alignedp;
2798 
2799 			/* Query the character class as wctype_t.  */
2800 			wt = IS_CHAR_CLASS (str);
2801 			if (wt == 0)
2802 			  FREE_STACK_RETURN (REG_ECTYPE);
2803 
2804                         /* Throw away the ] at the end of the character
2805                            class.  */
2806                         PATFETCH (c);
2807 
2808                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2809 
2810 			/* Allocate the space for character class.  */
2811                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2812 			/* Update the pointer to indicate end of buffer.  */
2813                         b += CHAR_CLASS_SIZE;
2814 			/* Move data which follow character classes
2815 			    not to violate the data.  */
2816                         insert_space(CHAR_CLASS_SIZE,
2817 				     laststart + 6 + laststart[1],
2818 				     b - 1);
2819 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2820 				    + __alignof__(wctype_t) - 1)
2821 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2822 			/* Store the character class.  */
2823                         *((wctype_t*)alignedp) = wt;
2824                         /* Update length of char_classes */
2825                         laststart[1] += CHAR_CLASS_SIZE;
2826 
2827                         had_char_class = true;
2828                       }
2829                     else
2830                       {
2831                         c1++;
2832                         while (c1--)
2833                           PATUNFETCH;
2834                         BUF_PUSH ('[');
2835                         BUF_PUSH (':');
2836                         laststart[5] += 2; /* Update the length of characters  */
2837 			range_start = ':';
2838                         had_char_class = false;
2839                       }
2840                   }
2841                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2842 							  || *p == '.'))
2843 		  {
2844 		    CHAR_TYPE str[128];	/* Should be large enough.  */
2845 		    CHAR_TYPE delim = *p; /* '=' or '.'  */
2846 # ifdef _LIBC
2847 		    uint32_t nrules =
2848 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2849 # endif
2850 		    PATFETCH (c);
2851 		    c1 = 0;
2852 
2853 		    /* If pattern is `[[=' or '[[.'.  */
2854 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2855 
2856 		    for (;;)
2857 		      {
2858 			PATFETCH (c);
2859 			if ((c == delim && *p == ']') || p == pend)
2860 			  break;
2861 			if (c1 < sizeof (str) - 1)
2862 			  str[c1++] = c;
2863 			else
2864 			  /* This is in any case an invalid class name.  */
2865 			  str[0] = '\0';
2866                       }
2867 		    str[c1] = '\0';
2868 
2869 		    if (c == delim && *p == ']' && str[0] != '\0')
2870 		      {
2871                         unsigned int i, offset;
2872 			/* If we have no collation data we use the default
2873 			   collation in which each character is in a class
2874 			   by itself.  It also means that ASCII is the
2875 			   character set and therefore we cannot have character
2876 			   with more than one byte in the multibyte
2877 			   representation.  */
2878 
2879                         /* If not defined _LIBC, we push the name and
2880 			   `\0' for the sake of matching performance.  */
2881 			int datasize = c1 + 1;
2882 
2883 # ifdef _LIBC
2884 			int32_t idx = 0;
2885 			if (nrules == 0)
2886 # endif
2887 			  {
2888 			    if (c1 != 1)
2889 			      FREE_STACK_RETURN (REG_ECOLLATE);
2890 			  }
2891 # ifdef _LIBC
2892 			else
2893 			  {
2894 			    const int32_t *table;
2895 			    const int32_t *weights;
2896 			    const int32_t *extra;
2897 			    const int32_t *indirect;
2898 			    wint_t *cp;
2899 
2900 			    /* This #include defines a local function!  */
2901 #  include <locale/weightwc.h>
2902 
2903 			    if(delim == '=')
2904 			      {
2905 				/* We push the index for equivalence class.  */
2906 				cp = (wint_t*)str;
2907 
2908 				table = (const int32_t *)
2909 				  _NL_CURRENT (LC_COLLATE,
2910 					       _NL_COLLATE_TABLEWC);
2911 				weights = (const int32_t *)
2912 				  _NL_CURRENT (LC_COLLATE,
2913 					       _NL_COLLATE_WEIGHTWC);
2914 				extra = (const int32_t *)
2915 				  _NL_CURRENT (LC_COLLATE,
2916 					       _NL_COLLATE_EXTRAWC);
2917 				indirect = (const int32_t *)
2918 				  _NL_CURRENT (LC_COLLATE,
2919 					       _NL_COLLATE_INDIRECTWC);
2920 
2921 				idx = findidx ((const wint_t**)&cp);
2922 				if (idx == 0 || cp < (wint_t*) str + c1)
2923 				  /* This is no valid character.  */
2924 				  FREE_STACK_RETURN (REG_ECOLLATE);
2925 
2926 				str[0] = (wchar_t)idx;
2927 			      }
2928 			    else /* delim == '.' */
2929 			      {
2930 				/* We push collation sequence value
2931 				   for collating symbol.  */
2932 				int32_t table_size;
2933 				const int32_t *symb_table;
2934 				const unsigned char *extra;
2935 				int32_t idx;
2936 				int32_t elem;
2937 				int32_t second;
2938 				int32_t hash;
2939 				char char_str[c1];
2940 
2941 				/* We have to convert the name to a single-byte
2942 				   string.  This is possible since the names
2943 				   consist of ASCII characters and the internal
2944 				   representation is UCS4.  */
2945 				for (i = 0; i < c1; ++i)
2946 				  char_str[i] = str[i];
2947 
2948 				table_size =
2949 				  _NL_CURRENT_WORD (LC_COLLATE,
2950 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
2951 				symb_table = (const int32_t *)
2952 				  _NL_CURRENT (LC_COLLATE,
2953 					       _NL_COLLATE_SYMB_TABLEMB);
2954 				extra = (const unsigned char *)
2955 				  _NL_CURRENT (LC_COLLATE,
2956 					       _NL_COLLATE_SYMB_EXTRAMB);
2957 
2958 				/* Locate the character in the hashing table.  */
2959 				hash = elem_hash (char_str, c1);
2960 
2961 				idx = 0;
2962 				elem = hash % table_size;
2963 				second = hash % (table_size - 2);
2964 				while (symb_table[2 * elem] != 0)
2965 				  {
2966 				    /* First compare the hashing value.  */
2967 				    if (symb_table[2 * elem] == hash
2968 					&& c1 == extra[symb_table[2 * elem + 1]]
2969 					&& memcmp (str,
2970 						   &extra[symb_table[2 * elem + 1]
2971 							 + 1], c1) == 0)
2972 				      {
2973 					/* Yep, this is the entry.  */
2974 					idx = symb_table[2 * elem + 1];
2975 					idx += 1 + extra[idx];
2976 					break;
2977 				      }
2978 
2979 				    /* Next entry.  */
2980 				    elem += second;
2981 				  }
2982 
2983 				if (symb_table[2 * elem] != 0)
2984 				  {
2985 				    /* Compute the index of the byte sequence
2986 				       in the table.  */
2987 				    idx += 1 + extra[idx];
2988 				    /* Adjust for the alignment.  */
2989 				    idx = (idx + 3) & ~4;
2990 
2991 				    str[0] = (wchar_t) idx + 4;
2992 				  }
2993 				else if (symb_table[2 * elem] == 0 && c1 == 1)
2994 				  {
2995 				    /* No valid character.  Match it as a
2996 				       single byte character.  */
2997 				    had_char_class = false;
2998 				    BUF_PUSH(str[0]);
2999 				    /* Update the length of characters  */
3000 				    laststart[5]++;
3001 				    range_start = str[0];
3002 
3003 				    /* Throw away the ] at the end of the
3004 				       collating symbol.  */
3005 				    PATFETCH (c);
3006 				    /* exit from the switch block.  */
3007 				    continue;
3008 				  }
3009 				else
3010 				  FREE_STACK_RETURN (REG_ECOLLATE);
3011 			      }
3012 			    datasize = 1;
3013 			  }
3014 # endif
3015                         /* Throw away the ] at the end of the equivalence
3016                            class (or collating symbol).  */
3017                         PATFETCH (c);
3018 
3019 			/* Allocate the space for the equivalence class
3020 			   (or collating symbol) (and '\0' if needed).  */
3021                         GET_BUFFER_SPACE(datasize);
3022 			/* Update the pointer to indicate end of buffer.  */
3023                         b += datasize;
3024 
3025 			if (delim == '=')
3026 			  { /* equivalence class  */
3027 			    /* Calculate the offset of char_ranges,
3028 			       which is next to equivalence_classes.  */
3029 			    offset = laststart[1] + laststart[2]
3030 			      + laststart[3] +6;
3031 			    /* Insert space.  */
3032 			    insert_space(datasize, laststart + offset, b - 1);
3033 
3034 			    /* Write the equivalence_class and \0.  */
3035 			    for (i = 0 ; i < datasize ; i++)
3036 			      laststart[offset + i] = str[i];
3037 
3038 			    /* Update the length of equivalence_classes.  */
3039 			    laststart[3] += datasize;
3040 			    had_char_class = true;
3041 			  }
3042 			else /* delim == '.' */
3043 			  { /* collating symbol  */
3044 			    /* Calculate the offset of the equivalence_classes,
3045 			       which is next to collating_symbols.  */
3046 			    offset = laststart[1] + laststart[2] + 6;
3047 			    /* Insert space and write the collationg_symbol
3048 			       and \0.  */
3049 			    insert_space(datasize, laststart + offset, b-1);
3050 			    for (i = 0 ; i < datasize ; i++)
3051 			      laststart[offset + i] = str[i];
3052 
3053 			    /* In re_match_2_internal if range_start < -1, we
3054 			       assume -range_start is the offset of the
3055 			       collating symbol which is specified as
3056 			       the character of the range start.  So we assign
3057 			       -(laststart[1] + laststart[2] + 6) to
3058 			       range_start.  */
3059 			    range_start = -(laststart[1] + laststart[2] + 6);
3060 			    /* Update the length of collating_symbol.  */
3061 			    laststart[2] += datasize;
3062 			    had_char_class = false;
3063 			  }
3064 		      }
3065                     else
3066                       {
3067                         c1++;
3068                         while (c1--)
3069                           PATUNFETCH;
3070                         BUF_PUSH ('[');
3071                         BUF_PUSH (delim);
3072                         laststart[5] += 2; /* Update the length of characters  */
3073 			range_start = delim;
3074                         had_char_class = false;
3075                       }
3076 		  }
3077                 else
3078                   {
3079                     had_char_class = false;
3080 		    BUF_PUSH(c);
3081 		    laststart[5]++;  /* Update the length of characters  */
3082 		    range_start = c;
3083                   }
3084 	      }
3085 
3086 #else /* not MBS_SUPPORT */
3087             /* Ensure that we have enough space to push a charset: the
3088                opcode, the length count, and the bitset; 34 bytes in all.  */
3089 	    GET_BUFFER_SPACE (34);
3090 
3091             laststart = b;
3092 
3093             /* We test `*p == '^' twice, instead of using an if
3094                statement, so we only need one BUF_PUSH.  */
3095             BUF_PUSH (*p == '^' ? charset_not : charset);
3096             if (*p == '^')
3097               p++;
3098 
3099             /* Remember the first position in the bracket expression.  */
3100             p1 = p;
3101 
3102             /* Push the number of bytes in the bitmap.  */
3103             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3104 
3105             /* Clear the whole map.  */
3106             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3107 
3108             /* charset_not matches newline according to a syntax bit.  */
3109             if ((re_opcode_t) b[-2] == charset_not
3110                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3111               SET_LIST_BIT ('\n');
3112 
3113             /* Read in characters and ranges, setting map bits.  */
3114             for (;;)
3115               {
3116                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3117 
3118                 PATFETCH (c);
3119 
3120                 /* \ might escape characters inside [...] and [^...].  */
3121                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3122                   {
3123                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3124 
3125                     PATFETCH (c1);
3126                     SET_LIST_BIT (c1);
3127 		    range_start = c1;
3128                     continue;
3129                   }
3130 
3131                 /* Could be the end of the bracket expression.  If it's
3132                    not (i.e., when the bracket expression is `[]' so
3133                    far), the ']' character bit gets set way below.  */
3134                 if (c == ']' && p != p1 + 1)
3135                   break;
3136 
3137                 /* Look ahead to see if it's a range when the last thing
3138                    was a character class.  */
3139                 if (had_char_class && c == '-' && *p != ']')
3140                   FREE_STACK_RETURN (REG_ERANGE);
3141 
3142                 /* Look ahead to see if it's a range when the last thing
3143                    was a character: if this is a hyphen not at the
3144                    beginning or the end of a list, then it's the range
3145                    operator.  */
3146                 if (c == '-'
3147                     && !(p - 2 >= pattern && p[-2] == '[')
3148                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3149                     && *p != ']')
3150                   {
3151                     reg_errcode_t ret
3152                       = compile_range (range_start, &p, pend, translate,
3153 				       syntax, b);
3154                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3155 		    range_start = 0xffffffff;
3156                   }
3157 
3158                 else if (p[0] == '-' && p[1] != ']')
3159                   { /* This handles ranges made up of characters only.  */
3160                     reg_errcode_t ret;
3161 
3162 		    /* Move past the `-'.  */
3163                     PATFETCH (c1);
3164 
3165                     ret = compile_range (c, &p, pend, translate, syntax, b);
3166                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3167 		    range_start = 0xffffffff;
3168                   }
3169 
3170                 /* See if we're at the beginning of a possible character
3171                    class.  */
3172 
3173                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3174                   { /* Leave room for the null.  */
3175                     char str[CHAR_CLASS_MAX_LENGTH + 1];
3176 
3177                     PATFETCH (c);
3178                     c1 = 0;
3179 
3180                     /* If pattern is `[[:'.  */
3181                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3182 
3183                     for (;;)
3184                       {
3185                         PATFETCH (c);
3186                         if ((c == ':' && *p == ']') || p == pend)
3187                           break;
3188 			if (c1 < CHAR_CLASS_MAX_LENGTH)
3189 			  str[c1++] = c;
3190 			else
3191 			  /* This is in any case an invalid class name.  */
3192 			  str[0] = '\0';
3193                       }
3194                     str[c1] = '\0';
3195 
3196                     /* If isn't a word bracketed by `[:' and `:]':
3197                        undo the ending character, the letters, and leave
3198                        the leading `:' and `[' (but set bits for them).  */
3199                     if (c == ':' && *p == ']')
3200                       {
3201 # if defined _LIBC || WIDE_CHAR_SUPPORT
3202                         boolean is_lower = STREQ (str, "lower");
3203                         boolean is_upper = STREQ (str, "upper");
3204 			wctype_t wt;
3205                         int ch;
3206 
3207 			wt = IS_CHAR_CLASS (str);
3208 			if (wt == 0)
3209 			  FREE_STACK_RETURN (REG_ECTYPE);
3210 
3211                         /* Throw away the ] at the end of the character
3212                            class.  */
3213                         PATFETCH (c);
3214 
3215                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3216 
3217                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3218 			  {
3219 #  ifdef _LIBC
3220 			    if (__iswctype (__btowc (ch), wt))
3221 			      SET_LIST_BIT (ch);
3222 #  else
3223 			    if (iswctype (btowc (ch), wt))
3224 			      SET_LIST_BIT (ch);
3225 #  endif
3226 
3227 			    if (translate && (is_upper || is_lower)
3228 				&& (ISUPPER (ch) || ISLOWER (ch)))
3229 			      SET_LIST_BIT (ch);
3230 			  }
3231 
3232                         had_char_class = true;
3233 # else
3234                         int ch;
3235                         boolean is_alnum = STREQ (str, "alnum");
3236                         boolean is_alpha = STREQ (str, "alpha");
3237                         boolean is_blank = STREQ (str, "blank");
3238                         boolean is_cntrl = STREQ (str, "cntrl");
3239                         boolean is_digit = STREQ (str, "digit");
3240                         boolean is_graph = STREQ (str, "graph");
3241                         boolean is_lower = STREQ (str, "lower");
3242                         boolean is_print = STREQ (str, "print");
3243                         boolean is_punct = STREQ (str, "punct");
3244                         boolean is_space = STREQ (str, "space");
3245                         boolean is_upper = STREQ (str, "upper");
3246                         boolean is_xdigit = STREQ (str, "xdigit");
3247 
3248                         if (!IS_CHAR_CLASS (str))
3249 			  FREE_STACK_RETURN (REG_ECTYPE);
3250 
3251                         /* Throw away the ] at the end of the character
3252                            class.  */
3253                         PATFETCH (c);
3254 
3255                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3256 
3257                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3258                           {
3259 			    /* This was split into 3 if's to
3260 			       avoid an arbitrary limit in some compiler.  */
3261                             if (   (is_alnum  && ISALNUM (ch))
3262                                 || (is_alpha  && ISALPHA (ch))
3263                                 || (is_blank  && ISBLANK (ch))
3264                                 || (is_cntrl  && ISCNTRL (ch)))
3265 			      SET_LIST_BIT (ch);
3266 			    if (   (is_digit  && ISDIGIT (ch))
3267                                 || (is_graph  && ISGRAPH (ch))
3268                                 || (is_lower  && ISLOWER (ch))
3269                                 || (is_print  && ISPRINT (ch)))
3270 			      SET_LIST_BIT (ch);
3271 			    if (   (is_punct  && ISPUNCT (ch))
3272                                 || (is_space  && ISSPACE (ch))
3273                                 || (is_upper  && ISUPPER (ch))
3274                                 || (is_xdigit && ISXDIGIT (ch)))
3275 			      SET_LIST_BIT (ch);
3276 			    if (   translate && (is_upper || is_lower)
3277 				&& (ISUPPER (ch) || ISLOWER (ch)))
3278 			      SET_LIST_BIT (ch);
3279                           }
3280                         had_char_class = true;
3281 # endif	/* libc || wctype.h */
3282                       }
3283                     else
3284                       {
3285                         c1++;
3286                         while (c1--)
3287                           PATUNFETCH;
3288                         SET_LIST_BIT ('[');
3289                         SET_LIST_BIT (':');
3290 			range_start = ':';
3291                         had_char_class = false;
3292                       }
3293                   }
3294                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3295 		  {
3296 		    unsigned char str[MB_LEN_MAX + 1];
3297 # ifdef _LIBC
3298 		    uint32_t nrules =
3299 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3300 # endif
3301 
3302 		    PATFETCH (c);
3303 		    c1 = 0;
3304 
3305 		    /* If pattern is `[[='.  */
3306 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3307 
3308 		    for (;;)
3309 		      {
3310 			PATFETCH (c);
3311 			if ((c == '=' && *p == ']') || p == pend)
3312 			  break;
3313 			if (c1 < MB_LEN_MAX)
3314 			  str[c1++] = c;
3315 			else
3316 			  /* This is in any case an invalid class name.  */
3317 			  str[0] = '\0';
3318                       }
3319 		    str[c1] = '\0';
3320 
3321 		    if (c == '=' && *p == ']' && str[0] != '\0')
3322 		      {
3323 			/* If we have no collation data we use the default
3324 			   collation in which each character is in a class
3325 			   by itself.  It also means that ASCII is the
3326 			   character set and therefore we cannot have character
3327 			   with more than one byte in the multibyte
3328 			   representation.  */
3329 # ifdef _LIBC
3330 			if (nrules == 0)
3331 # endif
3332 			  {
3333 			    if (c1 != 1)
3334 			      FREE_STACK_RETURN (REG_ECOLLATE);
3335 
3336 			    /* Throw away the ] at the end of the equivalence
3337 			       class.  */
3338 			    PATFETCH (c);
3339 
3340 			    /* Set the bit for the character.  */
3341 			    SET_LIST_BIT (str[0]);
3342 			  }
3343 # ifdef _LIBC
3344 			else
3345 			  {
3346 			    /* Try to match the byte sequence in `str' against
3347 			       those known to the collate implementation.
3348 			       First find out whether the bytes in `str' are
3349 			       actually from exactly one character.  */
3350 			    const int32_t *table;
3351 			    const unsigned char *weights;
3352 			    const unsigned char *extra;
3353 			    const int32_t *indirect;
3354 			    int32_t idx;
3355 			    const unsigned char *cp = str;
3356 			    int ch;
3357 
3358 			    /* This #include defines a local function!  */
3359 #  include <locale/weight.h>
3360 
3361 			    table = (const int32_t *)
3362 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3363 			    weights = (const unsigned char *)
3364 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3365 			    extra = (const unsigned char *)
3366 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3367 			    indirect = (const int32_t *)
3368 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3369 
3370 			    idx = findidx (&cp);
3371 			    if (idx == 0 || cp < str + c1)
3372 			      /* This is no valid character.  */
3373 			      FREE_STACK_RETURN (REG_ECOLLATE);
3374 
3375 			    /* Throw away the ] at the end of the equivalence
3376 			       class.  */
3377 			    PATFETCH (c);
3378 
3379 			    /* Now we have to go throught the whole table
3380 			       and find all characters which have the same
3381 			       first level weight.
3382 
3383 			       XXX Note that this is not entirely correct.
3384 			       we would have to match multibyte sequences
3385 			       but this is not possible with the current
3386 			       implementation.  */
3387 			    for (ch = 1; ch < 256; ++ch)
3388 			      /* XXX This test would have to be changed if we
3389 				 would allow matching multibyte sequences.  */
3390 			      if (table[ch] > 0)
3391 				{
3392 				  int32_t idx2 = table[ch];
3393 				  size_t len = weights[idx2];
3394 
3395 				  /* Test whether the lenghts match.  */
3396 				  if (weights[idx] == len)
3397 				    {
3398 				      /* They do.  New compare the bytes of
3399 					 the weight.  */
3400 				      size_t cnt = 0;
3401 
3402 				      while (cnt < len
3403 					     && (weights[idx + 1 + cnt]
3404 						 == weights[idx2 + 1 + cnt]))
3405 					++cnt;
3406 
3407 				      if (cnt == len)
3408 					/* They match.  Mark the character as
3409 					   acceptable.  */
3410 					SET_LIST_BIT (ch);
3411 				    }
3412 				}
3413 			  }
3414 # endif
3415 			had_char_class = true;
3416 		      }
3417                     else
3418                       {
3419                         c1++;
3420                         while (c1--)
3421                           PATUNFETCH;
3422                         SET_LIST_BIT ('[');
3423                         SET_LIST_BIT ('=');
3424 			range_start = '=';
3425                         had_char_class = false;
3426                       }
3427 		  }
3428                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3429 		  {
3430 		    unsigned char str[128];	/* Should be large enough.  */
3431 # ifdef _LIBC
3432 		    uint32_t nrules =
3433 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3434 # endif
3435 
3436 		    PATFETCH (c);
3437 		    c1 = 0;
3438 
3439 		    /* If pattern is `[[.'.  */
3440 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3441 
3442 		    for (;;)
3443 		      {
3444 			PATFETCH (c);
3445 			if ((c == '.' && *p == ']') || p == pend)
3446 			  break;
3447 			if (c1 < sizeof (str))
3448 			  str[c1++] = c;
3449 			else
3450 			  /* This is in any case an invalid class name.  */
3451 			  str[0] = '\0';
3452                       }
3453 		    str[c1] = '\0';
3454 
3455 		    if (c == '.' && *p == ']' && str[0] != '\0')
3456 		      {
3457 			/* If we have no collation data we use the default
3458 			   collation in which each character is the name
3459 			   for its own class which contains only the one
3460 			   character.  It also means that ASCII is the
3461 			   character set and therefore we cannot have character
3462 			   with more than one byte in the multibyte
3463 			   representation.  */
3464 # ifdef _LIBC
3465 			if (nrules == 0)
3466 # endif
3467 			  {
3468 			    if (c1 != 1)
3469 			      FREE_STACK_RETURN (REG_ECOLLATE);
3470 
3471 			    /* Throw away the ] at the end of the equivalence
3472 			       class.  */
3473 			    PATFETCH (c);
3474 
3475 			    /* Set the bit for the character.  */
3476 			    SET_LIST_BIT (str[0]);
3477 			    range_start = ((const unsigned char *) str)[0];
3478 			  }
3479 # ifdef _LIBC
3480 			else
3481 			  {
3482 			    /* Try to match the byte sequence in `str' against
3483 			       those known to the collate implementation.
3484 			       First find out whether the bytes in `str' are
3485 			       actually from exactly one character.  */
3486 			    int32_t table_size;
3487 			    const int32_t *symb_table;
3488 			    const unsigned char *extra;
3489 			    int32_t idx;
3490 			    int32_t elem;
3491 			    int32_t second;
3492 			    int32_t hash;
3493 
3494 			    table_size =
3495 			      _NL_CURRENT_WORD (LC_COLLATE,
3496 						_NL_COLLATE_SYMB_HASH_SIZEMB);
3497 			    symb_table = (const int32_t *)
3498 			      _NL_CURRENT (LC_COLLATE,
3499 					   _NL_COLLATE_SYMB_TABLEMB);
3500 			    extra = (const unsigned char *)
3501 			      _NL_CURRENT (LC_COLLATE,
3502 					   _NL_COLLATE_SYMB_EXTRAMB);
3503 
3504 			    /* Locate the character in the hashing table.  */
3505 			    hash = elem_hash (str, c1);
3506 
3507 			    idx = 0;
3508 			    elem = hash % table_size;
3509 			    second = hash % (table_size - 2);
3510 			    while (symb_table[2 * elem] != 0)
3511 			      {
3512 				/* First compare the hashing value.  */
3513 				if (symb_table[2 * elem] == hash
3514 				    && c1 == extra[symb_table[2 * elem + 1]]
3515 				    && memcmp (str,
3516 					       &extra[symb_table[2 * elem + 1]
3517 						     + 1],
3518 					       c1) == 0)
3519 				  {
3520 				    /* Yep, this is the entry.  */
3521 				    idx = symb_table[2 * elem + 1];
3522 				    idx += 1 + extra[idx];
3523 				    break;
3524 				  }
3525 
3526 				/* Next entry.  */
3527 				elem += second;
3528 			      }
3529 
3530 			    if (symb_table[2 * elem] == 0)
3531 			      /* This is no valid character.  */
3532 			      FREE_STACK_RETURN (REG_ECOLLATE);
3533 
3534 			    /* Throw away the ] at the end of the equivalence
3535 			       class.  */
3536 			    PATFETCH (c);
3537 
3538 			    /* Now add the multibyte character(s) we found
3539 			       to the accept list.
3540 
3541 			       XXX Note that this is not entirely correct.
3542 			       we would have to match multibyte sequences
3543 			       but this is not possible with the current
3544 			       implementation.  Also, we have to match
3545 			       collating symbols, which expand to more than
3546 			       one file, as a whole and not allow the
3547 			       individual bytes.  */
3548 			    c1 = extra[idx++];
3549 			    if (c1 == 1)
3550 			      range_start = extra[idx];
3551 			    while (c1-- > 0)
3552 			      {
3553 				SET_LIST_BIT (extra[idx]);
3554 				++idx;
3555 			      }
3556 			  }
3557 # endif
3558 			had_char_class = false;
3559 		      }
3560                     else
3561                       {
3562                         c1++;
3563                         while (c1--)
3564                           PATUNFETCH;
3565                         SET_LIST_BIT ('[');
3566                         SET_LIST_BIT ('.');
3567 			range_start = '.';
3568                         had_char_class = false;
3569                       }
3570 		  }
3571                 else
3572                   {
3573                     had_char_class = false;
3574                     SET_LIST_BIT (c);
3575 		    range_start = c;
3576                   }
3577               }
3578 
3579             /* Discard any (non)matching list bytes that are all 0 at the
3580                end of the map.  Decrease the map-length byte too.  */
3581             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3582               b[-1]--;
3583             b += b[-1];
3584 #endif /* MBS_SUPPORT */
3585           }
3586           break;
3587 
3588 
3589 	case '(':
3590           if (syntax & RE_NO_BK_PARENS)
3591             goto handle_open;
3592           else
3593             goto normal_char;
3594 
3595 
3596         case ')':
3597           if (syntax & RE_NO_BK_PARENS)
3598             goto handle_close;
3599           else
3600             goto normal_char;
3601 
3602 
3603         case '\n':
3604           if (syntax & RE_NEWLINE_ALT)
3605             goto handle_alt;
3606           else
3607             goto normal_char;
3608 
3609 
3610 	case '|':
3611           if (syntax & RE_NO_BK_VBAR)
3612             goto handle_alt;
3613           else
3614             goto normal_char;
3615 
3616 
3617         case '{':
3618            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3619              goto handle_interval;
3620            else
3621              goto normal_char;
3622 
3623 
3624         case '\\':
3625           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3626 
3627           /* Do not translate the character after the \, so that we can
3628              distinguish, e.g., \B from \b, even if we normally would
3629              translate, e.g., B to b.  */
3630           PATFETCH_RAW (c);
3631 
3632           switch (c)
3633             {
3634             case '(':
3635               if (syntax & RE_NO_BK_PARENS)
3636                 goto normal_backslash;
3637 
3638             handle_open:
3639               bufp->re_nsub++;
3640               regnum++;
3641 
3642               if (COMPILE_STACK_FULL)
3643                 {
3644                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
3645                             compile_stack_elt_t);
3646                   if (compile_stack.stack == NULL) return REG_ESPACE;
3647 
3648                   compile_stack.size <<= 1;
3649                 }
3650 
3651               /* These are the values to restore when we hit end of this
3652                  group.  They are all relative offsets, so that if the
3653                  whole pattern moves because of realloc, they will still
3654                  be valid.  */
3655               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3656               COMPILE_STACK_TOP.fixup_alt_jump
3657                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3658               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3659               COMPILE_STACK_TOP.regnum = regnum;
3660 
3661               /* We will eventually replace the 0 with the number of
3662                  groups inner to this one.  But do not push a
3663                  start_memory for groups beyond the last one we can
3664                  represent in the compiled pattern.  */
3665               if (regnum <= MAX_REGNUM)
3666                 {
3667                   COMPILE_STACK_TOP.inner_group_offset = b
3668 		    - COMPILED_BUFFER_VAR + 2;
3669                   BUF_PUSH_3 (start_memory, regnum, 0);
3670                 }
3671 
3672               compile_stack.avail++;
3673 
3674               fixup_alt_jump = 0;
3675               laststart = 0;
3676               begalt = b;
3677 	      /* If we've reached MAX_REGNUM groups, then this open
3678 		 won't actually generate any code, so we'll have to
3679 		 clear pending_exact explicitly.  */
3680 	      pending_exact = 0;
3681               break;
3682 
3683 
3684             case ')':
3685               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3686 
3687               if (COMPILE_STACK_EMPTY)
3688 		{
3689 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3690 		    goto normal_backslash;
3691 		  else
3692 		    FREE_STACK_RETURN (REG_ERPAREN);
3693 		}
3694 
3695             handle_close:
3696               if (fixup_alt_jump)
3697                 { /* Push a dummy failure point at the end of the
3698                      alternative for a possible future
3699                      `pop_failure_jump' to pop.  See comments at
3700                      `push_dummy_failure' in `re_match_2'.  */
3701                   BUF_PUSH (push_dummy_failure);
3702 
3703                   /* We allocated space for this jump when we assigned
3704                      to `fixup_alt_jump', in the `handle_alt' case below.  */
3705                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3706                 }
3707 
3708               /* See similar code for backslashed left paren above.  */
3709               if (COMPILE_STACK_EMPTY)
3710 		{
3711 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3712 		    goto normal_char;
3713 		  else
3714 		    FREE_STACK_RETURN (REG_ERPAREN);
3715 		}
3716 
3717               /* Since we just checked for an empty stack above, this
3718                  ``can't happen''.  */
3719               assert (compile_stack.avail != 0);
3720               {
3721                 /* We don't just want to restore into `regnum', because
3722                    later groups should continue to be numbered higher,
3723                    as in `(ab)c(de)' -- the second group is #2.  */
3724                 regnum_t this_group_regnum;
3725 
3726                 compile_stack.avail--;
3727                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3728                 fixup_alt_jump
3729                   = COMPILE_STACK_TOP.fixup_alt_jump
3730                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3731                     : 0;
3732                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3733                 this_group_regnum = COMPILE_STACK_TOP.regnum;
3734 		/* If we've reached MAX_REGNUM groups, then this open
3735 		   won't actually generate any code, so we'll have to
3736 		   clear pending_exact explicitly.  */
3737 		pending_exact = 0;
3738 
3739                 /* We're at the end of the group, so now we know how many
3740                    groups were inside this one.  */
3741                 if (this_group_regnum <= MAX_REGNUM)
3742                   {
3743 		    US_CHAR_TYPE *inner_group_loc
3744                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3745 
3746                     *inner_group_loc = regnum - this_group_regnum;
3747                     BUF_PUSH_3 (stop_memory, this_group_regnum,
3748                                 regnum - this_group_regnum);
3749                   }
3750               }
3751               break;
3752 
3753 
3754             case '|':					/* `\|'.  */
3755               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3756                 goto normal_backslash;
3757             handle_alt:
3758               if (syntax & RE_LIMITED_OPS)
3759                 goto normal_char;
3760 
3761               /* Insert before the previous alternative a jump which
3762                  jumps to this alternative if the former fails.  */
3763               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3764               INSERT_JUMP (on_failure_jump, begalt,
3765 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3766               pending_exact = 0;
3767               b += 1 + OFFSET_ADDRESS_SIZE;
3768 
3769               /* The alternative before this one has a jump after it
3770                  which gets executed if it gets matched.  Adjust that
3771                  jump so it will jump to this alternative's analogous
3772                  jump (put in below, which in turn will jump to the next
3773                  (if any) alternative's such jump, etc.).  The last such
3774                  jump jumps to the correct final destination.  A picture:
3775                           _____ _____
3776                           |   | |   |
3777                           |   v |   v
3778                          a | b   | c
3779 
3780                  If we are at `b', then fixup_alt_jump right now points to a
3781                  three-byte space after `a'.  We'll put in the jump, set
3782                  fixup_alt_jump to right after `b', and leave behind three
3783                  bytes which we'll fill in when we get to after `c'.  */
3784 
3785               if (fixup_alt_jump)
3786                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3787 
3788               /* Mark and leave space for a jump after this alternative,
3789                  to be filled in later either by next alternative or
3790                  when know we're at the end of a series of alternatives.  */
3791               fixup_alt_jump = b;
3792               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3793               b += 1 + OFFSET_ADDRESS_SIZE;
3794 
3795               laststart = 0;
3796               begalt = b;
3797               break;
3798 
3799 
3800             case '{':
3801               /* If \{ is a literal.  */
3802               if (!(syntax & RE_INTERVALS)
3803                      /* If we're at `\{' and it's not the open-interval
3804                         operator.  */
3805 		  || (syntax & RE_NO_BK_BRACES))
3806                 goto normal_backslash;
3807 
3808             handle_interval:
3809               {
3810                 /* If got here, then the syntax allows intervals.  */
3811 
3812                 /* At least (most) this many matches must be made.  */
3813                 int lower_bound = -1, upper_bound = -1;
3814 
3815 		/* Place in the uncompiled pattern (i.e., just after
3816 		   the '{') to go back to if the interval is invalid.  */
3817 		const CHAR_TYPE *beg_interval = p;
3818 
3819                 if (p == pend)
3820 		  goto invalid_interval;
3821 
3822                 GET_UNSIGNED_NUMBER (lower_bound);
3823 
3824                 if (c == ',')
3825                   {
3826                     GET_UNSIGNED_NUMBER (upper_bound);
3827 		    if (upper_bound < 0)
3828 		      upper_bound = RE_DUP_MAX;
3829                   }
3830                 else
3831                   /* Interval such as `{1}' => match exactly once. */
3832                   upper_bound = lower_bound;
3833 
3834                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
3835 		  goto invalid_interval;
3836 
3837                 if (!(syntax & RE_NO_BK_BRACES))
3838                   {
3839 		    if (c != '\\' || p == pend)
3840 		      goto invalid_interval;
3841                     PATFETCH (c);
3842                   }
3843 
3844                 if (c != '}')
3845 		  goto invalid_interval;
3846 
3847                 /* If it's invalid to have no preceding re.  */
3848                 if (!laststart)
3849                   {
3850 		    if (syntax & RE_CONTEXT_INVALID_OPS
3851 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
3852                       FREE_STACK_RETURN (REG_BADRPT);
3853                     else if (syntax & RE_CONTEXT_INDEP_OPS)
3854                       laststart = b;
3855                     else
3856                       goto unfetch_interval;
3857                   }
3858 
3859                 /* We just parsed a valid interval.  */
3860 
3861                 if (RE_DUP_MAX < upper_bound)
3862 		  FREE_STACK_RETURN (REG_BADBR);
3863 
3864                 /* If the upper bound is zero, don't want to succeed at
3865                    all; jump from `laststart' to `b + 3', which will be
3866 		   the end of the buffer after we insert the jump.  */
3867 		/* ifdef MBS_SUPPORT, 'b + 1 + OFFSET_ADDRESS_SIZE'
3868 		   instead of 'b + 3'.  */
3869                  if (upper_bound == 0)
3870                    {
3871                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3872                      INSERT_JUMP (jump, laststart, b + 1
3873 				  + OFFSET_ADDRESS_SIZE);
3874                      b += 1 + OFFSET_ADDRESS_SIZE;
3875                    }
3876 
3877                  /* Otherwise, we have a nontrivial interval.  When
3878                     we're all done, the pattern will look like:
3879                       set_number_at <jump count> <upper bound>
3880                       set_number_at <succeed_n count> <lower bound>
3881                       succeed_n <after jump addr> <succeed_n count>
3882                       <body of loop>
3883                       jump_n <succeed_n addr> <jump count>
3884                     (The upper bound and `jump_n' are omitted if
3885                     `upper_bound' is 1, though.)  */
3886                  else
3887                    { /* If the upper bound is > 1, we need to insert
3888                         more at the end of the loop.  */
3889                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3890 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3891 
3892                      GET_BUFFER_SPACE (nbytes);
3893 
3894                      /* Initialize lower bound of the `succeed_n', even
3895                         though it will be set during matching by its
3896                         attendant `set_number_at' (inserted next),
3897                         because `re_compile_fastmap' needs to know.
3898                         Jump to the `jump_n' we might insert below.  */
3899                      INSERT_JUMP2 (succeed_n, laststart,
3900                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
3901 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3902 				   , lower_bound);
3903                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3904 
3905                      /* Code to initialize the lower bound.  Insert
3906                         before the `succeed_n'.  The `5' is the last two
3907                         bytes of this `set_number_at', plus 3 bytes of
3908                         the following `succeed_n'.  */
3909 		     /* ifdef MBS_SUPPORT, The '1+2*OFFSET_ADDRESS_SIZE'
3910 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
3911 			of the following `succeed_n'.  */
3912                      insert_op2 (set_number_at, laststart, 1
3913 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
3914                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3915 
3916                      if (upper_bound > 1)
3917                        { /* More than one repetition is allowed, so
3918                             append a backward jump to the `succeed_n'
3919                             that starts this interval.
3920 
3921                             When we've reached this during matching,
3922                             we'll have matched the interval once, so
3923                             jump back only `upper_bound - 1' times.  */
3924                          STORE_JUMP2 (jump_n, b, laststart
3925 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
3926                                       upper_bound - 1);
3927                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3928 
3929                          /* The location we want to set is the second
3930                             parameter of the `jump_n'; that is `b-2' as
3931                             an absolute address.  `laststart' will be
3932                             the `set_number_at' we're about to insert;
3933                             `laststart+3' the number to set, the source
3934                             for the relative address.  But we are
3935                             inserting into the middle of the pattern --
3936                             so everything is getting moved up by 5.
3937                             Conclusion: (b - 2) - (laststart + 3) + 5,
3938                             i.e., b - laststart.
3939 
3940                             We insert this at the beginning of the loop
3941                             so that if we fail during matching, we'll
3942                             reinitialize the bounds.  */
3943                          insert_op2 (set_number_at, laststart, b - laststart,
3944                                      upper_bound - 1, b);
3945                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3946                        }
3947                    }
3948                 pending_exact = 0;
3949 		break;
3950 
3951 	      invalid_interval:
3952 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
3953 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
3954 	      unfetch_interval:
3955 		/* Match the characters as literals.  */
3956 		p = beg_interval;
3957 		c = '{';
3958 		if (syntax & RE_NO_BK_BRACES)
3959 		  goto normal_char;
3960 		else
3961 		  goto normal_backslash;
3962 	      }
3963 
3964 #ifdef emacs
3965             /* There is no way to specify the before_dot and after_dot
3966                operators.  rms says this is ok.  --karl  */
3967             case '=':
3968               BUF_PUSH (at_dot);
3969               break;
3970 
3971             case 's':
3972               laststart = b;
3973               PATFETCH (c);
3974               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3975               break;
3976 
3977             case 'S':
3978               laststart = b;
3979               PATFETCH (c);
3980               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3981               break;
3982 #endif /* emacs */
3983 
3984 
3985             case 'w':
3986 	      if (syntax & RE_NO_GNU_OPS)
3987 		goto normal_char;
3988               laststart = b;
3989               BUF_PUSH (wordchar);
3990               break;
3991 
3992 
3993             case 'W':
3994 	      if (syntax & RE_NO_GNU_OPS)
3995 		goto normal_char;
3996               laststart = b;
3997               BUF_PUSH (notwordchar);
3998               break;
3999 
4000 
4001             case '<':
4002 	      if (syntax & RE_NO_GNU_OPS)
4003 		goto normal_char;
4004               BUF_PUSH (wordbeg);
4005               break;
4006 
4007             case '>':
4008 	      if (syntax & RE_NO_GNU_OPS)
4009 		goto normal_char;
4010               BUF_PUSH (wordend);
4011               break;
4012 
4013             case 'b':
4014 	      if (syntax & RE_NO_GNU_OPS)
4015 		goto normal_char;
4016               BUF_PUSH (wordbound);
4017               break;
4018 
4019             case 'B':
4020 	      if (syntax & RE_NO_GNU_OPS)
4021 		goto normal_char;
4022               BUF_PUSH (notwordbound);
4023               break;
4024 
4025             case '`':
4026 	      if (syntax & RE_NO_GNU_OPS)
4027 		goto normal_char;
4028               BUF_PUSH (begbuf);
4029               break;
4030 
4031             case '\'':
4032 	      if (syntax & RE_NO_GNU_OPS)
4033 		goto normal_char;
4034               BUF_PUSH (endbuf);
4035               break;
4036 
4037             case '1': case '2': case '3': case '4': case '5':
4038             case '6': case '7': case '8': case '9':
4039               if (syntax & RE_NO_BK_REFS)
4040                 goto normal_char;
4041 
4042               c1 = c - '0';
4043 
4044               if (c1 > regnum)
4045                 FREE_STACK_RETURN (REG_ESUBREG);
4046 
4047               /* Can't back reference to a subexpression if inside of it.  */
4048               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4049                 goto normal_char;
4050 
4051               laststart = b;
4052               BUF_PUSH_2 (duplicate, c1);
4053               break;
4054 
4055 
4056             case '+':
4057             case '?':
4058               if (syntax & RE_BK_PLUS_QM)
4059                 goto handle_plus;
4060               else
4061                 goto normal_backslash;
4062 
4063             default:
4064             normal_backslash:
4065               /* You might think it would be useful for \ to mean
4066                  not to translate; but if we don't translate it
4067                  it will never match anything.  */
4068               c = TRANSLATE (c);
4069               goto normal_char;
4070             }
4071           break;
4072 
4073 
4074 	default:
4075         /* Expects the character in `c'.  */
4076 	normal_char:
4077 	      /* If no exactn currently being built.  */
4078           if (!pending_exact
4079 #ifdef MBS_SUPPORT
4080 	      /* If last exactn handle binary(or character) and
4081 		 new exactn handle character(or binary).  */
4082 	      || is_exactn_bin != is_binary[p - 1 - pattern]
4083 #endif /* MBS_SUPPORT */
4084 
4085               /* If last exactn not at current position.  */
4086               || pending_exact + *pending_exact + 1 != b
4087 
4088               /* We have only one byte following the exactn for the count.  */
4089 	      || *pending_exact == (1 << BYTEWIDTH) - 1
4090 
4091               /* If followed by a repetition operator.  */
4092               || *p == '*' || *p == '^'
4093 	      || ((syntax & RE_BK_PLUS_QM)
4094 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4095 		  : (*p == '+' || *p == '?'))
4096 	      || ((syntax & RE_INTERVALS)
4097                   && ((syntax & RE_NO_BK_BRACES)
4098 		      ? *p == '{'
4099                       : (p[0] == '\\' && p[1] == '{'))))
4100 	    {
4101 	      /* Start building a new exactn.  */
4102 
4103               laststart = b;
4104 
4105 #ifdef MBS_SUPPORT
4106 	      /* Is this exactn binary data or character? */
4107 	      is_exactn_bin = is_binary[p - 1 - pattern];
4108 	      if (is_exactn_bin)
4109 		  BUF_PUSH_2 (exactn_bin, 0);
4110 	      else
4111 		  BUF_PUSH_2 (exactn, 0);
4112 #else
4113 	      BUF_PUSH_2 (exactn, 0);
4114 #endif /* MBS_SUPPORT */
4115 	      pending_exact = b - 1;
4116             }
4117 
4118 	  BUF_PUSH (c);
4119           (*pending_exact)++;
4120 	  break;
4121         } /* switch (c) */
4122     } /* while p != pend */
4123 
4124 
4125   /* Through the pattern now.  */
4126 
4127   if (fixup_alt_jump)
4128     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4129 
4130   if (!COMPILE_STACK_EMPTY)
4131     FREE_STACK_RETURN (REG_EPAREN);
4132 
4133   /* If we don't want backtracking, force success
4134      the first time we reach the end of the compiled pattern.  */
4135   if (syntax & RE_NO_POSIX_BACKTRACKING)
4136     BUF_PUSH (succeed);
4137 
4138 #ifdef MBS_SUPPORT
4139   free (pattern);
4140   free (mbs_offset);
4141   free (is_binary);
4142 #endif
4143   free (compile_stack.stack);
4144 
4145   /* We have succeeded; set the length of the buffer.  */
4146 #ifdef MBS_SUPPORT
4147   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4148 #else
4149   bufp->used = b - bufp->buffer;
4150 #endif
4151 
4152 #ifdef DEBUG
4153   if (debug)
4154     {
4155       DEBUG_PRINT1 ("\nCompiled pattern: \n");
4156       print_compiled_pattern (bufp);
4157     }
4158 #endif /* DEBUG */
4159 
4160 #ifndef MATCH_MAY_ALLOCATE
4161   /* Initialize the failure stack to the largest possible stack.  This
4162      isn't necessary unless we're trying to avoid calling alloca in
4163      the search and match routines.  */
4164   {
4165     int num_regs = bufp->re_nsub + 1;
4166 
4167     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4168        is strictly greater than re_max_failures, the largest possible stack
4169        is 2 * re_max_failures failure points.  */
4170     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4171       {
4172 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4173 
4174 # ifdef emacs
4175 	if (! fail_stack.stack)
4176 	  fail_stack.stack
4177 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
4178 					    * sizeof (fail_stack_elt_t));
4179 	else
4180 	  fail_stack.stack
4181 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
4182 					     (fail_stack.size
4183 					      * sizeof (fail_stack_elt_t)));
4184 # else /* not emacs */
4185 	if (! fail_stack.stack)
4186 	  fail_stack.stack
4187 	    = (fail_stack_elt_t *) malloc (fail_stack.size
4188 					   * sizeof (fail_stack_elt_t));
4189 	else
4190 	  fail_stack.stack
4191 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
4192 					    (fail_stack.size
4193 					     * sizeof (fail_stack_elt_t)));
4194 # endif /* not emacs */
4195       }
4196 
4197     regex_grow_registers (num_regs);
4198   }
4199 #endif /* not MATCH_MAY_ALLOCATE */
4200 
4201   return REG_NOERROR;
4202 } /* regex_compile */
4203 
4204 /* Subroutines for `regex_compile'.  */
4205 
4206 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
4207 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
4208 
4209 static void
4210 store_op1 (op, loc, arg)
4211     re_opcode_t op;
4212     US_CHAR_TYPE *loc;
4213     int arg;
4214 {
4215   *loc = (US_CHAR_TYPE) op;
4216   STORE_NUMBER (loc + 1, arg);
4217 }
4218 
4219 
4220 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
4221 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
4222 
4223 static void
4224 store_op2 (op, loc, arg1, arg2)
4225     re_opcode_t op;
4226     US_CHAR_TYPE *loc;
4227     int arg1, arg2;
4228 {
4229   *loc = (US_CHAR_TYPE) op;
4230   STORE_NUMBER (loc + 1, arg1);
4231   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4232 }
4233 
4234 
4235 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
4236    for OP followed by two-byte integer parameter ARG.  */
4237 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
4238 
4239 static void
4240 insert_op1 (op, loc, arg, end)
4241     re_opcode_t op;
4242     US_CHAR_TYPE *loc;
4243     int arg;
4244     US_CHAR_TYPE *end;
4245 {
4246   register US_CHAR_TYPE *pfrom = end;
4247   register US_CHAR_TYPE *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4248 
4249   while (pfrom != loc)
4250     *--pto = *--pfrom;
4251 
4252   store_op1 (op, loc, arg);
4253 }
4254 
4255 
4256 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
4257 /* ifdef MBS_SUPPORT, integer parameter is 1 wchar_t.  */
4258 
4259 static void
4260 insert_op2 (op, loc, arg1, arg2, end)
4261     re_opcode_t op;
4262     US_CHAR_TYPE *loc;
4263     int arg1, arg2;
4264     US_CHAR_TYPE *end;
4265 {
4266   register US_CHAR_TYPE *pfrom = end;
4267   register US_CHAR_TYPE *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4268 
4269   while (pfrom != loc)
4270     *--pto = *--pfrom;
4271 
4272   store_op2 (op, loc, arg1, arg2);
4273 }
4274 
4275 
4276 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
4277    after an alternative or a begin-subexpression.  We assume there is at
4278    least one character before the ^.  */
4279 
4280 static boolean
4281 at_begline_loc_p (pattern, p, syntax)
4282     const CHAR_TYPE *pattern, *p;
4283     reg_syntax_t syntax;
4284 {
4285   const CHAR_TYPE *prev = p - 2;
4286   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4287 
4288   return
4289        /* After a subexpression?  */
4290        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4291        /* After an alternative?  */
4292     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4293 }
4294 
4295 
4296 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
4297    at least one character after the $, i.e., `P < PEND'.  */
4298 
4299 static boolean
4300 at_endline_loc_p (p, pend, syntax)
4301     const CHAR_TYPE *p, *pend;
4302     reg_syntax_t syntax;
4303 {
4304   const CHAR_TYPE *next = p;
4305   boolean next_backslash = *next == '\\';
4306   const CHAR_TYPE *next_next = p + 1 < pend ? p + 1 : 0;
4307 
4308   return
4309        /* Before a subexpression?  */
4310        (syntax & RE_NO_BK_PARENS ? *next == ')'
4311         : next_backslash && next_next && *next_next == ')')
4312        /* Before an alternative?  */
4313     || (syntax & RE_NO_BK_VBAR ? *next == '|'
4314         : next_backslash && next_next && *next_next == '|');
4315 }
4316 
4317 
4318 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4319    false if it's not.  */
4320 
4321 static boolean
4322 group_in_compile_stack (compile_stack, regnum)
4323     compile_stack_type compile_stack;
4324     regnum_t regnum;
4325 {
4326   int this_element;
4327 
4328   for (this_element = compile_stack.avail - 1;
4329        this_element >= 0;
4330        this_element--)
4331     if (compile_stack.stack[this_element].regnum == regnum)
4332       return true;
4333 
4334   return false;
4335 }
4336 
4337 #ifdef MBS_SUPPORT
4338 /* This insert space, which size is "num", into the pattern at "loc".
4339    "end" must point the end of the allocated buffer.  */
4340 static void
4341 insert_space (num, loc, end)
4342      int num;
4343      CHAR_TYPE *loc;
4344      CHAR_TYPE *end;
4345 {
4346   register CHAR_TYPE *pto = end;
4347   register CHAR_TYPE *pfrom = end - num;
4348 
4349   while (pfrom >= loc)
4350     *pto-- = *pfrom--;
4351 }
4352 #endif /* MBS_SUPPORT */
4353 
4354 #ifdef MBS_SUPPORT
4355 static reg_errcode_t
4356 compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
4357 	       char_set)
4358      CHAR_TYPE range_start_char;
4359      const CHAR_TYPE **p_ptr, *pend;
4360      CHAR_TYPE *char_set, *b;
4361      RE_TRANSLATE_TYPE translate;
4362      reg_syntax_t syntax;
4363 {
4364   const CHAR_TYPE *p = *p_ptr;
4365   CHAR_TYPE range_start, range_end;
4366   reg_errcode_t ret;
4367 # ifdef _LIBC
4368   uint32_t nrules;
4369   uint32_t start_val, end_val;
4370 # endif
4371   if (p == pend)
4372     return REG_ERANGE;
4373 
4374 # ifdef _LIBC
4375   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4376   if (nrules != 0)
4377     {
4378       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4379 						       _NL_COLLATE_COLLSEQWC);
4380       const unsigned char *extra = (const unsigned char *)
4381 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4382 
4383       if (range_start_char < -1)
4384 	{
4385 	  /* range_start is a collating symbol.  */
4386 	  int32_t *wextra;
4387 	  /* Retreive the index and get collation sequence value.  */
4388 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
4389 	  start_val = wextra[1 + *wextra];
4390 	}
4391       else
4392 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4393 
4394       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4395 
4396       /* Report an error if the range is empty and the syntax prohibits
4397 	 this.  */
4398       ret = ((syntax & RE_NO_EMPTY_RANGES)
4399 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4400 
4401       /* Insert space to the end of the char_ranges.  */
4402       insert_space(2, b - char_set[5] - 2, b - 1);
4403       *(b - char_set[5] - 2) = (wchar_t)start_val;
4404       *(b - char_set[5] - 1) = (wchar_t)end_val;
4405       char_set[4]++; /* ranges_index */
4406     }
4407   else
4408 # endif
4409     {
4410       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4411 	range_start_char;
4412       range_end = TRANSLATE (p[0]);
4413       /* Report an error if the range is empty and the syntax prohibits
4414 	 this.  */
4415       ret = ((syntax & RE_NO_EMPTY_RANGES)
4416 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4417 
4418       /* Insert space to the end of the char_ranges.  */
4419       insert_space(2, b - char_set[5] - 2, b - 1);
4420       *(b - char_set[5] - 2) = range_start;
4421       *(b - char_set[5] - 1) = range_end;
4422       char_set[4]++; /* ranges_index */
4423     }
4424   /* Have to increment the pointer into the pattern string, so the
4425      caller isn't still at the ending character.  */
4426   (*p_ptr)++;
4427 
4428   return ret;
4429 }
4430 #else
4431 /* Read the ending character of a range (in a bracket expression) from the
4432    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
4433    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
4434    Then we set the translation of all bits between the starting and
4435    ending characters (inclusive) in the compiled pattern B.
4436 
4437    Return an error code.
4438 
4439    We use these short variable names so we can use the same macros as
4440    `regex_compile' itself.  */
4441 
4442 static reg_errcode_t
4443 compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
4444      unsigned int range_start_char;
4445      const char **p_ptr, *pend;
4446      RE_TRANSLATE_TYPE translate;
4447      reg_syntax_t syntax;
4448      unsigned char *b;
4449 {
4450   unsigned this_char;
4451   const char *p = *p_ptr;
4452   reg_errcode_t ret;
4453 # if _LIBC
4454   const unsigned char *collseq;
4455   unsigned int start_colseq;
4456   unsigned int end_colseq;
4457 # else
4458   unsigned end_char;
4459 # endif
4460 
4461   if (p == pend)
4462     return REG_ERANGE;
4463 
4464   /* Have to increment the pointer into the pattern string, so the
4465      caller isn't still at the ending character.  */
4466   (*p_ptr)++;
4467 
4468   /* Report an error if the range is empty and the syntax prohibits this.  */
4469   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4470 
4471 # if _LIBC
4472   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4473 						 _NL_COLLATE_COLLSEQMB);
4474 
4475   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4476   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4477   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4478     {
4479       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4480 
4481       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4482 	{
4483 	  SET_LIST_BIT (TRANSLATE (this_char));
4484 	  ret = REG_NOERROR;
4485 	}
4486     }
4487 # else
4488   /* Here we see why `this_char' has to be larger than an `unsigned
4489      char' -- we would otherwise go into an infinite loop, since all
4490      characters <= 0xff.  */
4491   range_start_char = TRANSLATE (range_start_char);
4492   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4493      and some compilers cast it to int implicitly, so following for_loop
4494      may fall to (almost) infinite loop.
4495      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4496      To avoid this, we cast p[0] to unsigned int and truncate it.  */
4497   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4498 
4499   for (this_char = range_start_char; this_char <= end_char; ++this_char)
4500     {
4501       SET_LIST_BIT (TRANSLATE (this_char));
4502       ret = REG_NOERROR;
4503     }
4504 # endif
4505 
4506   return ret;
4507 }
4508 #endif /* MBS_SUPPORT */
4509 
4510 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4511    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4512    characters can start a string that matches the pattern.  This fastmap
4513    is used by re_search to skip quickly over impossible starting points.
4514 
4515    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4516    area as BUFP->fastmap.
4517 
4518    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4519    the pattern buffer.
4520 
4521    Returns 0 if we succeed, -2 if an internal error.   */
4522 
4523 #ifdef MBS_SUPPORT
4524 /* local function for re_compile_fastmap.
4525    truncate wchar_t character to char.  */
4526 static unsigned char truncate_wchar (CHAR_TYPE c);
4527 
4528 static unsigned char
4529 truncate_wchar (c)
4530      CHAR_TYPE c;
4531 {
4532   unsigned char buf[MB_LEN_MAX];
4533   int retval = wctomb(buf, c);
4534   return retval > 0 ? buf[0] : (unsigned char)c;
4535 }
4536 #endif /* MBS_SUPPORT */
4537 
4538 int
4539 re_compile_fastmap (bufp)
4540      struct re_pattern_buffer *bufp;
4541 {
4542   int j, k;
4543 #ifdef MATCH_MAY_ALLOCATE
4544   fail_stack_type fail_stack;
4545 #endif
4546 #ifndef REGEX_MALLOC
4547   char *destination;
4548 #endif
4549 
4550   register char *fastmap = bufp->fastmap;
4551 
4552 #ifdef MBS_SUPPORT
4553   /* We need to cast pattern to (wchar_t*), because we casted this compiled
4554      pattern to (char*) in regex_compile.  */
4555   US_CHAR_TYPE *pattern = (US_CHAR_TYPE*)bufp->buffer;
4556   register US_CHAR_TYPE *pend = (US_CHAR_TYPE*) (bufp->buffer + bufp->used);
4557 #else
4558   US_CHAR_TYPE *pattern = bufp->buffer;
4559   register US_CHAR_TYPE *pend = pattern + bufp->used;
4560 #endif /* MBS_SUPPORT */
4561   US_CHAR_TYPE *p = pattern;
4562 
4563 #ifdef REL_ALLOC
4564   /* This holds the pointer to the failure stack, when
4565      it is allocated relocatably.  */
4566   fail_stack_elt_t *failure_stack_ptr;
4567 #endif
4568 
4569   /* Assume that each path through the pattern can be null until
4570      proven otherwise.  We set this false at the bottom of switch
4571      statement, to which we get only if a particular path doesn't
4572      match the empty string.  */
4573   boolean path_can_be_null = true;
4574 
4575   /* We aren't doing a `succeed_n' to begin with.  */
4576   boolean succeed_n_p = false;
4577 
4578   assert (fastmap != NULL && p != NULL);
4579 
4580   INIT_FAIL_STACK ();
4581   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4582   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4583   bufp->can_be_null = 0;
4584 
4585   while (1)
4586     {
4587       if (p == pend || *p == succeed)
4588 	{
4589 	  /* We have reached the (effective) end of pattern.  */
4590 	  if (!FAIL_STACK_EMPTY ())
4591 	    {
4592 	      bufp->can_be_null |= path_can_be_null;
4593 
4594 	      /* Reset for next path.  */
4595 	      path_can_be_null = true;
4596 
4597 	      p = fail_stack.stack[--fail_stack.avail].pointer;
4598 
4599 	      continue;
4600 	    }
4601 	  else
4602 	    break;
4603 	}
4604 
4605       /* We should never be about to go beyond the end of the pattern.  */
4606       assert (p < pend);
4607 
4608       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4609 	{
4610 
4611         /* I guess the idea here is to simply not bother with a fastmap
4612            if a backreference is used, since it's too hard to figure out
4613            the fastmap for the corresponding group.  Setting
4614            `can_be_null' stops `re_search_2' from using the fastmap, so
4615            that is all we do.  */
4616 	case duplicate:
4617 	  bufp->can_be_null = 1;
4618           goto done;
4619 
4620 
4621       /* Following are the cases which match a character.  These end
4622          with `break'.  */
4623 
4624 #ifdef MBS_SUPPORT
4625 	case exactn:
4626           fastmap[truncate_wchar(p[1])] = 1;
4627 	  break;
4628 	case exactn_bin:
4629 	  fastmap[p[1]] = 1;
4630 	  break;
4631 #else
4632 	case exactn:
4633           fastmap[p[1]] = 1;
4634 	  break;
4635 #endif /* MBS_SUPPORT */
4636 
4637 
4638 #ifdef MBS_SUPPORT
4639         /* It is hard to distinguish fastmap from (multi byte) characters
4640            which depends on current locale.  */
4641         case charset:
4642 	case charset_not:
4643 	case wordchar:
4644 	case notwordchar:
4645           bufp->can_be_null = 1;
4646           goto done;
4647 #else
4648         case charset:
4649           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4650 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4651               fastmap[j] = 1;
4652 	  break;
4653 
4654 
4655 	case charset_not:
4656 	  /* Chars beyond end of map must be allowed.  */
4657 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4658             fastmap[j] = 1;
4659 
4660 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4661 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4662               fastmap[j] = 1;
4663           break;
4664 
4665 
4666 	case wordchar:
4667 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4668 	    if (SYNTAX (j) == Sword)
4669 	      fastmap[j] = 1;
4670 	  break;
4671 
4672 
4673 	case notwordchar:
4674 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4675 	    if (SYNTAX (j) != Sword)
4676 	      fastmap[j] = 1;
4677 	  break;
4678 #endif
4679 
4680         case anychar:
4681 	  {
4682 	    int fastmap_newline = fastmap['\n'];
4683 
4684 	    /* `.' matches anything ...  */
4685 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
4686 	      fastmap[j] = 1;
4687 
4688 	    /* ... except perhaps newline.  */
4689 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
4690 	      fastmap['\n'] = fastmap_newline;
4691 
4692 	    /* Return if we have already set `can_be_null'; if we have,
4693 	       then the fastmap is irrelevant.  Something's wrong here.  */
4694 	    else if (bufp->can_be_null)
4695 	      goto done;
4696 
4697 	    /* Otherwise, have to check alternative paths.  */
4698 	    break;
4699 	  }
4700 
4701 #ifdef emacs
4702         case syntaxspec:
4703 	  k = *p++;
4704 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4705 	    if (SYNTAX (j) == (enum syntaxcode) k)
4706 	      fastmap[j] = 1;
4707 	  break;
4708 
4709 
4710 	case notsyntaxspec:
4711 	  k = *p++;
4712 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4713 	    if (SYNTAX (j) != (enum syntaxcode) k)
4714 	      fastmap[j] = 1;
4715 	  break;
4716 
4717 
4718       /* All cases after this match the empty string.  These end with
4719          `continue'.  */
4720 
4721 
4722 	case before_dot:
4723 	case at_dot:
4724 	case after_dot:
4725           continue;
4726 #endif /* emacs */
4727 
4728 
4729         case no_op:
4730         case begline:
4731         case endline:
4732 	case begbuf:
4733 	case endbuf:
4734 	case wordbound:
4735 	case notwordbound:
4736 	case wordbeg:
4737 	case wordend:
4738         case push_dummy_failure:
4739           continue;
4740 
4741 
4742 	case jump_n:
4743         case pop_failure_jump:
4744 	case maybe_pop_jump:
4745 	case jump:
4746         case jump_past_alt:
4747 	case dummy_failure_jump:
4748           EXTRACT_NUMBER_AND_INCR (j, p);
4749 	  p += j;
4750 	  if (j > 0)
4751 	    continue;
4752 
4753           /* Jump backward implies we just went through the body of a
4754              loop and matched nothing.  Opcode jumped to should be
4755              `on_failure_jump' or `succeed_n'.  Just treat it like an
4756              ordinary jump.  For a * loop, it has pushed its failure
4757              point already; if so, discard that as redundant.  */
4758           if ((re_opcode_t) *p != on_failure_jump
4759 	      && (re_opcode_t) *p != succeed_n)
4760 	    continue;
4761 
4762           p++;
4763           EXTRACT_NUMBER_AND_INCR (j, p);
4764           p += j;
4765 
4766           /* If what's on the stack is where we are now, pop it.  */
4767           if (!FAIL_STACK_EMPTY ()
4768 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4769             fail_stack.avail--;
4770 
4771           continue;
4772 
4773 
4774         case on_failure_jump:
4775         case on_failure_keep_string_jump:
4776 	handle_on_failure_jump:
4777           EXTRACT_NUMBER_AND_INCR (j, p);
4778 
4779           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4780              end of the pattern.  We don't want to push such a point,
4781              since when we restore it above, entering the switch will
4782              increment `p' past the end of the pattern.  We don't need
4783              to push such a point since we obviously won't find any more
4784              fastmap entries beyond `pend'.  Such a pattern can match
4785              the null string, though.  */
4786           if (p + j < pend)
4787             {
4788               if (!PUSH_PATTERN_OP (p + j, fail_stack))
4789 		{
4790 		  RESET_FAIL_STACK ();
4791 		  return -2;
4792 		}
4793             }
4794           else
4795             bufp->can_be_null = 1;
4796 
4797           if (succeed_n_p)
4798             {
4799               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
4800               succeed_n_p = false;
4801 	    }
4802 
4803           continue;
4804 
4805 
4806 	case succeed_n:
4807           /* Get to the number of times to succeed.  */
4808           p += OFFSET_ADDRESS_SIZE;
4809 
4810           /* Increment p past the n for when k != 0.  */
4811           EXTRACT_NUMBER_AND_INCR (k, p);
4812           if (k == 0)
4813 	    {
4814               p -= 2 * OFFSET_ADDRESS_SIZE;
4815   	      succeed_n_p = true;  /* Spaghetti code alert.  */
4816               goto handle_on_failure_jump;
4817             }
4818           continue;
4819 
4820 
4821 	case set_number_at:
4822           p += 2 * OFFSET_ADDRESS_SIZE;
4823           continue;
4824 
4825 
4826 	case start_memory:
4827         case stop_memory:
4828 	  p += 2;
4829 	  continue;
4830 
4831 
4832 	default:
4833           abort (); /* We have listed all the cases.  */
4834         } /* switch *p++ */
4835 
4836       /* Getting here means we have found the possible starting
4837          characters for one path of the pattern -- and that the empty
4838          string does not match.  We need not follow this path further.
4839          Instead, look at the next alternative (remembered on the
4840          stack), or quit if no more.  The test at the top of the loop
4841          does these things.  */
4842       path_can_be_null = false;
4843       p = pend;
4844     } /* while p */
4845 
4846   /* Set `can_be_null' for the last path (also the first path, if the
4847      pattern is empty).  */
4848   bufp->can_be_null |= path_can_be_null;
4849 
4850  done:
4851   RESET_FAIL_STACK ();
4852   return 0;
4853 } /* re_compile_fastmap */
4854 #ifdef _LIBC
4855 weak_alias (__re_compile_fastmap, re_compile_fastmap)
4856 #endif
4857 
4858 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4859    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4860    this memory for recording register information.  STARTS and ENDS
4861    must be allocated using the malloc library routine, and must each
4862    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4863 
4864    If NUM_REGS == 0, then subsequent matches should allocate their own
4865    register data.
4866 
4867    Unless this function is called, the first search or match using
4868    PATTERN_BUFFER will allocate its own register data, without
4869    freeing the old data.  */
4870 
4871 void
4872 re_set_registers (bufp, regs, num_regs, starts, ends)
4873     struct re_pattern_buffer *bufp;
4874     struct re_registers *regs;
4875     unsigned num_regs;
4876     regoff_t *starts, *ends;
4877 {
4878   if (num_regs)
4879     {
4880       bufp->regs_allocated = REGS_REALLOCATE;
4881       regs->num_regs = num_regs;
4882       regs->start = starts;
4883       regs->end = ends;
4884     }
4885   else
4886     {
4887       bufp->regs_allocated = REGS_UNALLOCATED;
4888       regs->num_regs = 0;
4889       regs->start = regs->end = (regoff_t *) 0;
4890     }
4891 }
4892 #ifdef _LIBC
4893 weak_alias (__re_set_registers, re_set_registers)
4894 #endif
4895 
4896 /* Searching routines.  */
4897 
4898 /* Like re_search_2, below, but only one string is specified, and
4899    doesn't let you say where to stop matching.  */
4900 
4901 int
4902 re_search (bufp, string, size, startpos, range, regs)
4903      struct re_pattern_buffer *bufp;
4904      const char *string;
4905      int size, startpos, range;
4906      struct re_registers *regs;
4907 {
4908   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4909 		      regs, size);
4910 }
4911 #ifdef _LIBC
4912 weak_alias (__re_search, re_search)
4913 #endif
4914 
4915 
4916 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4917    virtual concatenation of STRING1 and STRING2, starting first at index
4918    STARTPOS, then at STARTPOS + 1, and so on.
4919 
4920    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4921 
4922    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4923    only at STARTPOS; in general, the last start tried is STARTPOS +
4924    RANGE.
4925 
4926    In REGS, return the indices of the virtual concatenation of STRING1
4927    and STRING2 that matched the entire BUFP->buffer and its contained
4928    subexpressions.
4929 
4930    Do not consider matching one past the index STOP in the virtual
4931    concatenation of STRING1 and STRING2.
4932 
4933    We return either the position in the strings at which the match was
4934    found, -1 if no match, or -2 if error (such as failure
4935    stack overflow).  */
4936 
4937 int
4938 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
4939      struct re_pattern_buffer *bufp;
4940      const char *string1, *string2;
4941      int size1, size2;
4942      int startpos;
4943      int range;
4944      struct re_registers *regs;
4945      int stop;
4946 {
4947   int val;
4948   register char *fastmap = bufp->fastmap;
4949   register RE_TRANSLATE_TYPE translate = bufp->translate;
4950   int total_size = size1 + size2;
4951   int endpos = startpos + range;
4952 
4953   /* Check for out-of-range STARTPOS.  */
4954   if (startpos < 0 || startpos > total_size)
4955     return -1;
4956 
4957   /* Fix up RANGE if it might eventually take us outside
4958      the virtual concatenation of STRING1 and STRING2.
4959      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
4960   if (endpos < 0)
4961     range = 0 - startpos;
4962   else if (endpos > total_size)
4963     range = total_size - startpos;
4964 
4965   /* If the search isn't to be a backwards one, don't waste time in a
4966      search for a pattern that must be anchored.  */
4967   if (bufp->used > 0 && range > 0
4968       && ((re_opcode_t) bufp->buffer[0] == begbuf
4969 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
4970 	  || ((re_opcode_t) bufp->buffer[0] == begline
4971 	      && !bufp->newline_anchor)))
4972     {
4973       if (startpos > 0)
4974 	return -1;
4975       else
4976 	range = 1;
4977     }
4978 
4979 #ifdef emacs
4980   /* In a forward search for something that starts with \=.
4981      don't keep searching past point.  */
4982   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4983     {
4984       range = PT - startpos;
4985       if (range <= 0)
4986 	return -1;
4987     }
4988 #endif /* emacs */
4989 
4990   /* Update the fastmap now if not correct already.  */
4991   if (fastmap && !bufp->fastmap_accurate)
4992     if (re_compile_fastmap (bufp) == -2)
4993       return -2;
4994 
4995   /* Loop through the string, looking for a place to start matching.  */
4996   for (;;)
4997     {
4998       /* If a fastmap is supplied, skip quickly over characters that
4999          cannot be the start of a match.  If the pattern can match the
5000          null string, however, we don't need to skip characters; we want
5001          the first null string.  */
5002       if (fastmap && startpos < total_size && !bufp->can_be_null)
5003 	{
5004 	  if (range > 0)	/* Searching forwards.  */
5005 	    {
5006 	      register const char *d;
5007 	      register int lim = 0;
5008 	      int irange = range;
5009 
5010               if (startpos < size1 && startpos + range >= size1)
5011                 lim = range - (size1 - startpos);
5012 
5013 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5014 
5015               /* Written out as an if-else to avoid testing `translate'
5016                  inside the loop.  */
5017 	      if (translate)
5018                 while (range > lim
5019                        && !fastmap[(unsigned char)
5020 				   translate[(unsigned char) *d++]])
5021                   range--;
5022 	      else
5023                 while (range > lim && !fastmap[(unsigned char) *d++])
5024                   range--;
5025 
5026 	      startpos += irange - range;
5027 	    }
5028 	  else				/* Searching backwards.  */
5029 	    {
5030 	      register CHAR_TYPE c = (size1 == 0 || startpos >= size1
5031 				      ? string2[startpos - size1]
5032 				      : string1[startpos]);
5033 
5034 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
5035 		goto advance;
5036 	    }
5037 	}
5038 
5039       /* If can't match the null string, and that's all we have left, fail.  */
5040       if (range >= 0 && startpos == total_size && fastmap
5041           && !bufp->can_be_null)
5042 	return -1;
5043 
5044       val = re_match_2_internal (bufp, string1, size1, string2, size2,
5045 				 startpos, regs, stop);
5046 #ifndef REGEX_MALLOC
5047 # ifdef C_ALLOCA
5048       alloca (0);
5049 # endif
5050 #endif
5051 
5052       if (val >= 0)
5053 	return startpos;
5054 
5055       if (val == -2)
5056 	return -2;
5057 
5058     advance:
5059       if (!range)
5060         break;
5061       else if (range > 0)
5062         {
5063           range--;
5064           startpos++;
5065         }
5066       else
5067         {
5068           range++;
5069           startpos--;
5070         }
5071     }
5072   return -1;
5073 } /* re_search_2 */
5074 #ifdef _LIBC
5075 weak_alias (__re_search_2, re_search_2)
5076 #endif
5077 
5078 #ifdef MBS_SUPPORT
5079 /* This converts PTR, a pointer into one of the search wchar_t strings
5080    `string1' and `string2' into an multibyte string offset from the
5081    beginning of that string. We use mbs_offset to optimize.
5082    See convert_mbs_to_wcs.  */
5083 # define POINTER_TO_OFFSET(ptr)						\
5084   (FIRST_STRING_P (ptr)							\
5085    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
5086    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
5087 		 + csize1)))
5088 #else
5089 /* This converts PTR, a pointer into one of the search strings `string1'
5090    and `string2' into an offset from the beginning of that string.  */
5091 # define POINTER_TO_OFFSET(ptr)			\
5092   (FIRST_STRING_P (ptr)				\
5093    ? ((regoff_t) ((ptr) - string1))		\
5094    : ((regoff_t) ((ptr) - string2 + size1)))
5095 #endif /* MBS_SUPPORT */
5096 
5097 /* Macros for dealing with the split strings in re_match_2.  */
5098 
5099 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
5100 
5101 /* Call before fetching a character with *d.  This switches over to
5102    string2 if necessary.  */
5103 #define PREFETCH()							\
5104   while (d == dend)						    	\
5105     {									\
5106       /* End of string2 => fail.  */					\
5107       if (dend == end_match_2) 						\
5108         goto fail;							\
5109       /* End of string1 => advance to string2.  */ 			\
5110       d = string2;						        \
5111       dend = end_match_2;						\
5112     }
5113 
5114 
5115 /* Test if at very beginning or at very end of the virtual concatenation
5116    of `string1' and `string2'.  If only one string, it's `string2'.  */
5117 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5118 #define AT_STRINGS_END(d) ((d) == end2)
5119 
5120 
5121 /* Test if D points to a character which is word-constituent.  We have
5122    two special cases to check for: if past the end of string1, look at
5123    the first character in string2; and if before the beginning of
5124    string2, look at the last character in string1.  */
5125 #ifdef MBS_SUPPORT
5126 /* Use internationalized API instead of SYNTAX.  */
5127 # define WORDCHAR_P(d)							\
5128   (iswalnum ((wint_t)((d) == end1 ? *string2				\
5129            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0)
5130 #else
5131 # define WORDCHAR_P(d)							\
5132   (SYNTAX ((d) == end1 ? *string2					\
5133            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
5134    == Sword)
5135 #endif /* MBS_SUPPORT */
5136 
5137 /* Disabled due to a compiler bug -- see comment at case wordbound */
5138 #if 0
5139 /* Test if the character before D and the one at D differ with respect
5140    to being word-constituent.  */
5141 #define AT_WORD_BOUNDARY(d)						\
5142   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
5143    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5144 #endif
5145 
5146 /* Free everything we malloc.  */
5147 #ifdef MATCH_MAY_ALLOCATE
5148 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
5149 # ifdef MBS_SUPPORT
5150 #  define FREE_VARIABLES()						\
5151   do {									\
5152     REGEX_FREE_STACK (fail_stack.stack);				\
5153     FREE_VAR (regstart);						\
5154     FREE_VAR (regend);							\
5155     FREE_VAR (old_regstart);						\
5156     FREE_VAR (old_regend);						\
5157     FREE_VAR (best_regstart);						\
5158     FREE_VAR (best_regend);						\
5159     FREE_VAR (reg_info);						\
5160     FREE_VAR (reg_dummy);						\
5161     FREE_VAR (reg_info_dummy);						\
5162     FREE_VAR (string1);							\
5163     FREE_VAR (string2);							\
5164     FREE_VAR (mbs_offset1);						\
5165     FREE_VAR (mbs_offset2);						\
5166   } while (0)
5167 # else /* not MBS_SUPPORT */
5168 #  define FREE_VARIABLES()						\
5169   do {									\
5170     REGEX_FREE_STACK (fail_stack.stack);				\
5171     FREE_VAR (regstart);						\
5172     FREE_VAR (regend);							\
5173     FREE_VAR (old_regstart);						\
5174     FREE_VAR (old_regend);						\
5175     FREE_VAR (best_regstart);						\
5176     FREE_VAR (best_regend);						\
5177     FREE_VAR (reg_info);						\
5178     FREE_VAR (reg_dummy);						\
5179     FREE_VAR (reg_info_dummy);						\
5180   } while (0)
5181 # endif /* MBS_SUPPORT */
5182 #else
5183 # define FREE_VAR(var) if (var) free (var); var = NULL
5184 # ifdef MBS_SUPPORT
5185 #  define FREE_VARIABLES()						\
5186   do {									\
5187     FREE_VAR (string1);							\
5188     FREE_VAR (string2);							\
5189     FREE_VAR (mbs_offset1);						\
5190     FREE_VAR (mbs_offset2);						\
5191   } while (0)
5192 # else
5193 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
5194 # endif /* MBS_SUPPORT */
5195 #endif /* not MATCH_MAY_ALLOCATE */
5196 
5197 /* These values must meet several constraints.  They must not be valid
5198    register values; since we have a limit of 255 registers (because
5199    we use only one byte in the pattern for the register number), we can
5200    use numbers larger than 255.  They must differ by 1, because of
5201    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
5202    be larger than the value for the highest register, so we do not try
5203    to actually save any registers when none are active.  */
5204 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5205 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5206 
5207 /* Matching routines.  */
5208 
5209 #ifndef emacs   /* Emacs never uses this.  */
5210 /* re_match is like re_match_2 except it takes only a single string.  */
5211 
5212 int
5213 re_match (bufp, string, size, pos, regs)
5214      struct re_pattern_buffer *bufp;
5215      const char *string;
5216      int size, pos;
5217      struct re_registers *regs;
5218 {
5219   int result = re_match_2_internal (bufp, NULL, 0, string, size,
5220 				    pos, regs, size);
5221 # ifndef REGEX_MALLOC
5222 #  ifdef C_ALLOCA
5223   alloca (0);
5224 #  endif
5225 # endif
5226   return result;
5227 }
5228 # ifdef _LIBC
5229 weak_alias (__re_match, re_match)
5230 # endif
5231 #endif /* not emacs */
5232 
5233 static boolean group_match_null_string_p _RE_ARGS ((US_CHAR_TYPE **p,
5234 						    US_CHAR_TYPE *end,
5235 						register_info_type *reg_info));
5236 static boolean alt_match_null_string_p _RE_ARGS ((US_CHAR_TYPE *p,
5237 						  US_CHAR_TYPE *end,
5238 						register_info_type *reg_info));
5239 static boolean common_op_match_null_string_p _RE_ARGS ((US_CHAR_TYPE **p,
5240 							US_CHAR_TYPE *end,
5241 						register_info_type *reg_info));
5242 static int bcmp_translate _RE_ARGS ((const CHAR_TYPE *s1, const CHAR_TYPE *s2,
5243 				     int len, char *translate));
5244 
5245 /* re_match_2 matches the compiled pattern in BUFP against the
5246    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5247    and SIZE2, respectively).  We start matching at POS, and stop
5248    matching at STOP.
5249 
5250    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5251    store offsets for the substring each group matched in REGS.  See the
5252    documentation for exactly how many groups we fill.
5253 
5254    We return -1 if no match, -2 if an internal error (such as the
5255    failure stack overflowing).  Otherwise, we return the length of the
5256    matched substring.  */
5257 
5258 int
5259 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5260      struct re_pattern_buffer *bufp;
5261      const char *string1, *string2;
5262      int size1, size2;
5263      int pos;
5264      struct re_registers *regs;
5265      int stop;
5266 {
5267   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
5268 				    pos, regs, stop);
5269 #ifndef REGEX_MALLOC
5270 # ifdef C_ALLOCA
5271   alloca (0);
5272 # endif
5273 #endif
5274   return result;
5275 }
5276 #ifdef _LIBC
5277 weak_alias (__re_match_2, re_match_2)
5278 #endif
5279 
5280 #ifdef MBS_SUPPORT
5281 
5282 static int count_mbs_length PARAMS ((int *, int));
5283 
5284 /* This check the substring (from 0, to length) of the multibyte string,
5285    to which offset_buffer correspond. And count how many wchar_t_characters
5286    the substring occupy. We use offset_buffer to optimization.
5287    See convert_mbs_to_wcs.  */
5288 
5289 static int
5290 count_mbs_length(offset_buffer, length)
5291      int *offset_buffer;
5292      int length;
5293 {
5294   int wcs_size;
5295 
5296   /* Check whether the size is valid.  */
5297   if (length < 0)
5298     return -1;
5299 
5300   if (offset_buffer == NULL)
5301     return 0;
5302 
5303   for (wcs_size = 0 ; offset_buffer[wcs_size] != -1 ; wcs_size++)
5304     {
5305       if (offset_buffer[wcs_size] == length)
5306 	return wcs_size;
5307       if (offset_buffer[wcs_size] > length)
5308 	/* It is a fragment of a wide character.  */
5309 	return -1;
5310     }
5311 
5312   /* We reached at the sentinel.  */
5313   return -1;
5314 }
5315 #endif /* MBS_SUPPORT */
5316 
5317 /* This is a separate function so that we can force an alloca cleanup
5318    afterwards.  */
5319 static int
5320 #ifdef MBS_SUPPORT
5321 re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos, regs, stop)
5322      struct re_pattern_buffer *bufp;
5323      const char *cstring1, *cstring2;
5324      int csize1, csize2;
5325 #else
5326 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5327      struct re_pattern_buffer *bufp;
5328      const char *string1, *string2;
5329      int size1, size2;
5330 #endif
5331      int pos;
5332      struct re_registers *regs;
5333      int stop;
5334 {
5335   /* General temporaries.  */
5336   int mcnt;
5337   US_CHAR_TYPE *p1;
5338 #ifdef MBS_SUPPORT
5339   /* We need wchar_t* buffers correspond to string1, string2.  */
5340   CHAR_TYPE *string1 = NULL, *string2 = NULL;
5341   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5342   int size1 = 0, size2 = 0;
5343   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5344   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5345   /* They hold whether each wchar_t is binary data or not.  */
5346   char *is_binary = NULL;
5347 #endif /* MBS_SUPPORT */
5348 
5349   /* Just past the end of the corresponding string.  */
5350   const CHAR_TYPE *end1, *end2;
5351 
5352   /* Pointers into string1 and string2, just past the last characters in
5353      each to consider matching.  */
5354   const CHAR_TYPE *end_match_1, *end_match_2;
5355 
5356   /* Where we are in the data, and the end of the current string.  */
5357   const CHAR_TYPE *d, *dend;
5358 
5359   /* Where we are in the pattern, and the end of the pattern.  */
5360 #ifdef MBS_SUPPORT
5361   US_CHAR_TYPE *pattern, *p;
5362   register US_CHAR_TYPE *pend;
5363 #else
5364   US_CHAR_TYPE *p = bufp->buffer;
5365   register US_CHAR_TYPE *pend = p + bufp->used;
5366 #endif /* MBS_SUPPORT */
5367 
5368   /* Mark the opcode just after a start_memory, so we can test for an
5369      empty subpattern when we get to the stop_memory.  */
5370   US_CHAR_TYPE *just_past_start_mem = 0;
5371 
5372   /* We use this to map every character in the string.  */
5373   RE_TRANSLATE_TYPE translate = bufp->translate;
5374 
5375   /* Failure point stack.  Each place that can handle a failure further
5376      down the line pushes a failure point on this stack.  It consists of
5377      restart, regend, and reg_info for all registers corresponding to
5378      the subexpressions we're currently inside, plus the number of such
5379      registers, and, finally, two char *'s.  The first char * is where
5380      to resume scanning the pattern; the second one is where to resume
5381      scanning the strings.  If the latter is zero, the failure point is
5382      a ``dummy''; if a failure happens and the failure point is a dummy,
5383      it gets discarded and the next next one is tried.  */
5384 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5385   fail_stack_type fail_stack;
5386 #endif
5387 #ifdef DEBUG
5388   static unsigned failure_id;
5389   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5390 #endif
5391 
5392 #ifdef REL_ALLOC
5393   /* This holds the pointer to the failure stack, when
5394      it is allocated relocatably.  */
5395   fail_stack_elt_t *failure_stack_ptr;
5396 #endif
5397 
5398   /* We fill all the registers internally, independent of what we
5399      return, for use in backreferences.  The number here includes
5400      an element for register zero.  */
5401   size_t num_regs = bufp->re_nsub + 1;
5402 
5403   /* The currently active registers.  */
5404   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5405   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5406 
5407   /* Information on the contents of registers. These are pointers into
5408      the input strings; they record just what was matched (on this
5409      attempt) by a subexpression part of the pattern, that is, the
5410      regnum-th regstart pointer points to where in the pattern we began
5411      matching and the regnum-th regend points to right after where we
5412      stopped matching the regnum-th subexpression.  (The zeroth register
5413      keeps track of what the whole pattern matches.)  */
5414 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5415   const CHAR_TYPE **regstart, **regend;
5416 #endif
5417 
5418   /* If a group that's operated upon by a repetition operator fails to
5419      match anything, then the register for its start will need to be
5420      restored because it will have been set to wherever in the string we
5421      are when we last see its open-group operator.  Similarly for a
5422      register's end.  */
5423 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5424   const CHAR_TYPE **old_regstart, **old_regend;
5425 #endif
5426 
5427   /* The is_active field of reg_info helps us keep track of which (possibly
5428      nested) subexpressions we are currently in. The matched_something
5429      field of reg_info[reg_num] helps us tell whether or not we have
5430      matched any of the pattern so far this time through the reg_num-th
5431      subexpression.  These two fields get reset each time through any
5432      loop their register is in.  */
5433 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5434   register_info_type *reg_info;
5435 #endif
5436 
5437   /* The following record the register info as found in the above
5438      variables when we find a match better than any we've seen before.
5439      This happens as we backtrack through the failure points, which in
5440      turn happens only if we have not yet matched the entire string. */
5441   unsigned best_regs_set = false;
5442 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5443   const CHAR_TYPE **best_regstart, **best_regend;
5444 #endif
5445 
5446   /* Logically, this is `best_regend[0]'.  But we don't want to have to
5447      allocate space for that if we're not allocating space for anything
5448      else (see below).  Also, we never need info about register 0 for
5449      any of the other register vectors, and it seems rather a kludge to
5450      treat `best_regend' differently than the rest.  So we keep track of
5451      the end of the best match so far in a separate variable.  We
5452      initialize this to NULL so that when we backtrack the first time
5453      and need to test it, it's not garbage.  */
5454   const CHAR_TYPE *match_end = NULL;
5455 
5456   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
5457   int set_regs_matched_done = 0;
5458 
5459   /* Used when we pop values we don't care about.  */
5460 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5461   const CHAR_TYPE **reg_dummy;
5462   register_info_type *reg_info_dummy;
5463 #endif
5464 
5465 #ifdef DEBUG
5466   /* Counts the total number of registers pushed.  */
5467   unsigned num_regs_pushed = 0;
5468 #endif
5469 
5470   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5471 
5472   INIT_FAIL_STACK ();
5473 
5474 #ifdef MATCH_MAY_ALLOCATE
5475   /* Do not bother to initialize all the register variables if there are
5476      no groups in the pattern, as it takes a fair amount of time.  If
5477      there are groups, we include space for register 0 (the whole
5478      pattern), even though we never use it, since it simplifies the
5479      array indexing.  We should fix this.  */
5480   if (bufp->re_nsub)
5481     {
5482       regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5483       regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5484       old_regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5485       old_regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5486       best_regstart = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5487       best_regend = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5488       reg_info = REGEX_TALLOC (num_regs, register_info_type);
5489       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_TYPE *);
5490       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
5491 
5492       if (!(regstart && regend && old_regstart && old_regend && reg_info
5493             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5494         {
5495           FREE_VARIABLES ();
5496           return -2;
5497         }
5498     }
5499   else
5500     {
5501       /* We must initialize all our variables to NULL, so that
5502          `FREE_VARIABLES' doesn't try to free them.  */
5503       regstart = regend = old_regstart = old_regend = best_regstart
5504         = best_regend = reg_dummy = NULL;
5505       reg_info = reg_info_dummy = (register_info_type *) NULL;
5506     }
5507 #endif /* MATCH_MAY_ALLOCATE */
5508 
5509   /* The starting position is bogus.  */
5510 #ifdef MBS_SUPPORT
5511   if (pos < 0 || pos > csize1 + csize2)
5512 #else
5513   if (pos < 0 || pos > size1 + size2)
5514 #endif
5515     {
5516       FREE_VARIABLES ();
5517       return -1;
5518     }
5519 
5520 #ifdef MBS_SUPPORT
5521   /* Allocate wchar_t array for string1 and string2 and
5522      fill them with converted string.  */
5523   if (csize1 != 0)
5524     {
5525       string1 = REGEX_TALLOC (csize1 + 1, CHAR_TYPE);
5526       mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5527       is_binary = REGEX_TALLOC (csize1 + 1, char);
5528       if (!string1 || !mbs_offset1 || !is_binary)
5529 	{
5530 	  FREE_VAR (string1);
5531 	  FREE_VAR (mbs_offset1);
5532 	  FREE_VAR (is_binary);
5533 	  return -2;
5534 	}
5535       size1 = convert_mbs_to_wcs(string1, cstring1, csize1,
5536 				 mbs_offset1, is_binary);
5537       string1[size1] = L'\0'; /* for a sentinel  */
5538       FREE_VAR (is_binary);
5539     }
5540   if (csize2 != 0)
5541     {
5542       string2 = REGEX_TALLOC (csize2 + 1, CHAR_TYPE);
5543       mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5544       is_binary = REGEX_TALLOC (csize2 + 1, char);
5545       if (!string2 || !mbs_offset2 || !is_binary)
5546 	{
5547 	  FREE_VAR (string1);
5548 	  FREE_VAR (mbs_offset1);
5549 	  FREE_VAR (string2);
5550 	  FREE_VAR (mbs_offset2);
5551 	  FREE_VAR (is_binary);
5552 	  return -2;
5553 	}
5554       size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5555 				 mbs_offset2, is_binary);
5556       string2[size2] = L'\0'; /* for a sentinel  */
5557       FREE_VAR (is_binary);
5558     }
5559 
5560   /* We need to cast pattern to (wchar_t*), because we casted this compiled
5561      pattern to (char*) in regex_compile.  */
5562   p = pattern = (CHAR_TYPE*)bufp->buffer;
5563   pend = (CHAR_TYPE*)(bufp->buffer + bufp->used);
5564 
5565 #endif /* MBS_SUPPORT */
5566 
5567   /* Initialize subexpression text positions to -1 to mark ones that no
5568      start_memory/stop_memory has been seen for. Also initialize the
5569      register information struct.  */
5570   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5571     {
5572       regstart[mcnt] = regend[mcnt]
5573         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
5574 
5575       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
5576       IS_ACTIVE (reg_info[mcnt]) = 0;
5577       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5578       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5579     }
5580 
5581   /* We move `string1' into `string2' if the latter's empty -- but not if
5582      `string1' is null.  */
5583   if (size2 == 0 && string1 != NULL)
5584     {
5585       string2 = string1;
5586       size2 = size1;
5587       string1 = 0;
5588       size1 = 0;
5589     }
5590   end1 = string1 + size1;
5591   end2 = string2 + size2;
5592 
5593   /* Compute where to stop matching, within the two strings.  */
5594 #ifdef MBS_SUPPORT
5595   if (stop <= csize1)
5596     {
5597       mcnt = count_mbs_length(mbs_offset1, stop);
5598       end_match_1 = string1 + mcnt;
5599       end_match_2 = string2;
5600     }
5601   else
5602     {
5603       end_match_1 = end1;
5604       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
5605       end_match_2 = string2 + mcnt;
5606     }
5607   if (mcnt < 0)
5608     { /* count_mbs_length return error.  */
5609       FREE_VARIABLES ();
5610       return -1;
5611     }
5612 #else
5613   if (stop <= size1)
5614     {
5615       end_match_1 = string1 + stop;
5616       end_match_2 = string2;
5617     }
5618   else
5619     {
5620       end_match_1 = end1;
5621       end_match_2 = string2 + stop - size1;
5622     }
5623 #endif /* MBS_SUPPORT */
5624 
5625   /* `p' scans through the pattern as `d' scans through the data.
5626      `dend' is the end of the input string that `d' points within.  `d'
5627      is advanced into the following input string whenever necessary, but
5628      this happens before fetching; therefore, at the beginning of the
5629      loop, `d' can be pointing at the end of a string, but it cannot
5630      equal `string2'.  */
5631 #ifdef MBS_SUPPORT
5632   if (size1 > 0 && pos <= csize1)
5633     {
5634       mcnt = count_mbs_length(mbs_offset1, pos);
5635       d = string1 + mcnt;
5636       dend = end_match_1;
5637     }
5638   else
5639     {
5640       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
5641       d = string2 + mcnt;
5642       dend = end_match_2;
5643     }
5644 
5645   if (mcnt < 0)
5646     { /* count_mbs_length return error.  */
5647       FREE_VARIABLES ();
5648       return -1;
5649     }
5650 #else
5651   if (size1 > 0 && pos <= size1)
5652     {
5653       d = string1 + pos;
5654       dend = end_match_1;
5655     }
5656   else
5657     {
5658       d = string2 + pos - size1;
5659       dend = end_match_2;
5660     }
5661 #endif /* MBS_SUPPORT */
5662 
5663   DEBUG_PRINT1 ("The compiled pattern is:\n");
5664   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5665   DEBUG_PRINT1 ("The string to match is: `");
5666   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5667   DEBUG_PRINT1 ("'\n");
5668 
5669   /* This loops over pattern commands.  It exits by returning from the
5670      function if the match is complete, or it drops through if the match
5671      fails at this starting point in the input data.  */
5672   for (;;)
5673     {
5674 #ifdef _LIBC
5675       DEBUG_PRINT2 ("\n%p: ", p);
5676 #else
5677       DEBUG_PRINT2 ("\n0x%x: ", p);
5678 #endif
5679 
5680       if (p == pend)
5681 	{ /* End of pattern means we might have succeeded.  */
5682           DEBUG_PRINT1 ("end of pattern ... ");
5683 
5684 	  /* If we haven't matched the entire string, and we want the
5685              longest match, try backtracking.  */
5686           if (d != end_match_2)
5687 	    {
5688 	      /* 1 if this match ends in the same string (string1 or string2)
5689 		 as the best previous match.  */
5690 	      boolean same_str_p = (FIRST_STRING_P (match_end)
5691 				    == MATCHING_IN_FIRST_STRING);
5692 	      /* 1 if this match is the best seen so far.  */
5693 	      boolean best_match_p;
5694 
5695 	      /* AIX compiler got confused when this was combined
5696 		 with the previous declaration.  */
5697 	      if (same_str_p)
5698 		best_match_p = d > match_end;
5699 	      else
5700 		best_match_p = !MATCHING_IN_FIRST_STRING;
5701 
5702               DEBUG_PRINT1 ("backtracking.\n");
5703 
5704               if (!FAIL_STACK_EMPTY ())
5705                 { /* More failure points to try.  */
5706 
5707                   /* If exceeds best match so far, save it.  */
5708                   if (!best_regs_set || best_match_p)
5709                     {
5710                       best_regs_set = true;
5711                       match_end = d;
5712 
5713                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5714 
5715                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5716                         {
5717                           best_regstart[mcnt] = regstart[mcnt];
5718                           best_regend[mcnt] = regend[mcnt];
5719                         }
5720                     }
5721                   goto fail;
5722                 }
5723 
5724               /* If no failure points, don't restore garbage.  And if
5725                  last match is real best match, don't restore second
5726                  best one. */
5727               else if (best_regs_set && !best_match_p)
5728                 {
5729   	        restore_best_regs:
5730                   /* Restore best match.  It may happen that `dend ==
5731                      end_match_1' while the restored d is in string2.
5732                      For example, the pattern `x.*y.*z' against the
5733                      strings `x-' and `y-z-', if the two strings are
5734                      not consecutive in memory.  */
5735                   DEBUG_PRINT1 ("Restoring best registers.\n");
5736 
5737                   d = match_end;
5738                   dend = ((d >= string1 && d <= end1)
5739 		           ? end_match_1 : end_match_2);
5740 
5741 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5742 		    {
5743 		      regstart[mcnt] = best_regstart[mcnt];
5744 		      regend[mcnt] = best_regend[mcnt];
5745 		    }
5746                 }
5747             } /* d != end_match_2 */
5748 
5749 	succeed_label:
5750           DEBUG_PRINT1 ("Accepting match.\n");
5751           /* If caller wants register contents data back, do it.  */
5752           if (regs && !bufp->no_sub)
5753 	    {
5754 	      /* Have the register data arrays been allocated?  */
5755               if (bufp->regs_allocated == REGS_UNALLOCATED)
5756                 { /* No.  So allocate them with malloc.  We need one
5757                      extra element beyond `num_regs' for the `-1' marker
5758                      GNU code uses.  */
5759                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5760                   regs->start = TALLOC (regs->num_regs, regoff_t);
5761                   regs->end = TALLOC (regs->num_regs, regoff_t);
5762                   if (regs->start == NULL || regs->end == NULL)
5763 		    {
5764 		      FREE_VARIABLES ();
5765 		      return -2;
5766 		    }
5767                   bufp->regs_allocated = REGS_REALLOCATE;
5768                 }
5769               else if (bufp->regs_allocated == REGS_REALLOCATE)
5770                 { /* Yes.  If we need more elements than were already
5771                      allocated, reallocate them.  If we need fewer, just
5772                      leave it alone.  */
5773                   if (regs->num_regs < num_regs + 1)
5774                     {
5775                       regs->num_regs = num_regs + 1;
5776                       RETALLOC (regs->start, regs->num_regs, regoff_t);
5777                       RETALLOC (regs->end, regs->num_regs, regoff_t);
5778                       if (regs->start == NULL || regs->end == NULL)
5779 			{
5780 			  FREE_VARIABLES ();
5781 			  return -2;
5782 			}
5783                     }
5784                 }
5785               else
5786 		{
5787 		  /* These braces fend off a "empty body in an else-statement"
5788 		     warning under GCC when assert expands to nothing.  */
5789 		  assert (bufp->regs_allocated == REGS_FIXED);
5790 		}
5791 
5792               /* Convert the pointer data in `regstart' and `regend' to
5793                  indices.  Register zero has to be set differently,
5794                  since we haven't kept track of any info for it.  */
5795               if (regs->num_regs > 0)
5796                 {
5797                   regs->start[0] = pos;
5798 #ifdef MBS_SUPPORT
5799 		  if (MATCHING_IN_FIRST_STRING)
5800 		    regs->end[0] = mbs_offset1 != NULL ?
5801 					mbs_offset1[d-string1] : 0;
5802 		  else
5803 		    regs->end[0] = csize1 + (mbs_offset2 != NULL ?
5804 					     mbs_offset2[d-string2] : 0);
5805 #else
5806                   regs->end[0] = (MATCHING_IN_FIRST_STRING
5807 				  ? ((regoff_t) (d - string1))
5808 			          : ((regoff_t) (d - string2 + size1)));
5809 #endif /* MBS_SUPPORT */
5810                 }
5811 
5812               /* Go through the first `min (num_regs, regs->num_regs)'
5813                  registers, since that is all we initialized.  */
5814 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
5815 		   mcnt++)
5816 		{
5817                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
5818                     regs->start[mcnt] = regs->end[mcnt] = -1;
5819                   else
5820                     {
5821 		      regs->start[mcnt]
5822 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
5823                       regs->end[mcnt]
5824 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
5825                     }
5826 		}
5827 
5828               /* If the regs structure we return has more elements than
5829                  were in the pattern, set the extra elements to -1.  If
5830                  we (re)allocated the registers, this is the case,
5831                  because we always allocate enough to have at least one
5832                  -1 at the end.  */
5833               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
5834                 regs->start[mcnt] = regs->end[mcnt] = -1;
5835 	    } /* regs && !bufp->no_sub */
5836 
5837           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
5838                         nfailure_points_pushed, nfailure_points_popped,
5839                         nfailure_points_pushed - nfailure_points_popped);
5840           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
5841 
5842 #ifdef MBS_SUPPORT
5843 	  if (MATCHING_IN_FIRST_STRING)
5844 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
5845 	  else
5846 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
5847 			csize1;
5848           mcnt -= pos;
5849 #else
5850           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
5851 			    ? string1
5852 			    : string2 - size1);
5853 #endif /* MBS_SUPPORT */
5854 
5855           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
5856 
5857           FREE_VARIABLES ();
5858           return mcnt;
5859         }
5860 
5861       /* Otherwise match next pattern command.  */
5862       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
5863 	{
5864         /* Ignore these.  Used to ignore the n of succeed_n's which
5865            currently have n == 0.  */
5866         case no_op:
5867           DEBUG_PRINT1 ("EXECUTING no_op.\n");
5868           break;
5869 
5870 	case succeed:
5871           DEBUG_PRINT1 ("EXECUTING succeed.\n");
5872 	  goto succeed_label;
5873 
5874         /* Match the next n pattern characters exactly.  The following
5875            byte in the pattern defines n, and the n bytes after that
5876            are the characters to match.  */
5877 	case exactn:
5878 #ifdef MBS_SUPPORT
5879 	case exactn_bin:
5880 #endif
5881 	  mcnt = *p++;
5882           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
5883 
5884           /* This is written out as an if-else so we don't waste time
5885              testing `translate' inside the loop.  */
5886           if (translate)
5887 	    {
5888 	      do
5889 		{
5890 		  PREFETCH ();
5891 #ifdef MBS_SUPPORT
5892 		  if (*d <= 0xff)
5893 		    {
5894 		      if ((US_CHAR_TYPE) translate[(unsigned char) *d++]
5895 			  != (US_CHAR_TYPE) *p++)
5896 			goto fail;
5897 		    }
5898 		  else
5899 		    {
5900 		      if (*d++ != (CHAR_TYPE) *p++)
5901 			goto fail;
5902 		    }
5903 #else
5904 		  if ((US_CHAR_TYPE) translate[(unsigned char) *d++]
5905 		      != (US_CHAR_TYPE) *p++)
5906                     goto fail;
5907 #endif /* MBS_SUPPORT */
5908 		}
5909 	      while (--mcnt);
5910 	    }
5911 	  else
5912 	    {
5913 	      do
5914 		{
5915 		  PREFETCH ();
5916 		  if (*d++ != (CHAR_TYPE) *p++) goto fail;
5917 		}
5918 	      while (--mcnt);
5919 	    }
5920 	  SET_REGS_MATCHED ();
5921           break;
5922 
5923 
5924         /* Match any character except possibly a newline or a null.  */
5925 	case anychar:
5926           DEBUG_PRINT1 ("EXECUTING anychar.\n");
5927 
5928           PREFETCH ();
5929 
5930           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
5931               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
5932 	    goto fail;
5933 
5934           SET_REGS_MATCHED ();
5935           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
5936           d++;
5937 	  break;
5938 
5939 
5940 	case charset:
5941 	case charset_not:
5942 	  {
5943 	    register US_CHAR_TYPE c;
5944 #ifdef MBS_SUPPORT
5945 	    unsigned int i, char_class_length, coll_symbol_length,
5946               equiv_class_length, ranges_length, chars_length, length;
5947 	    CHAR_TYPE *workp, *workp2, *charset_top;
5948 #define WORK_BUFFER_SIZE 128
5949             CHAR_TYPE str_buf[WORK_BUFFER_SIZE];
5950 # ifdef _LIBC
5951 	    uint32_t nrules;
5952 # endif /* _LIBC */
5953 #endif /* MBS_SUPPORT */
5954 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
5955 
5956             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
5957 	    PREFETCH ();
5958 	    c = TRANSLATE (*d); /* The character to match.  */
5959 #ifdef MBS_SUPPORT
5960 # ifdef _LIBC
5961 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
5962 # endif /* _LIBC */
5963 	    charset_top = p - 1;
5964 	    char_class_length = *p++;
5965 	    coll_symbol_length = *p++;
5966 	    equiv_class_length = *p++;
5967 	    ranges_length = *p++;
5968 	    chars_length = *p++;
5969 	    /* p points charset[6], so the address of the next instruction
5970 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
5971 	       where l=length of char_classes, m=length of collating_symbol,
5972 	       n=equivalence_class, o=length of char_range,
5973 	       p'=length of character.  */
5974 	    workp = p;
5975 	    /* Update p to indicate the next instruction.  */
5976 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
5977               2*ranges_length + chars_length;
5978 
5979             /* match with char_class?  */
5980 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
5981 	      {
5982 		wctype_t wctype;
5983 		uintptr_t alignedp = ((uintptr_t)workp
5984 				      + __alignof__(wctype_t) - 1)
5985 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
5986 		wctype = *((wctype_t*)alignedp);
5987 		workp += CHAR_CLASS_SIZE;
5988 		if (iswctype((wint_t)c, wctype))
5989 		  goto char_set_matched;
5990 	      }
5991 
5992             /* match with collating_symbol?  */
5993 # ifdef _LIBC
5994 	    if (nrules != 0)
5995 	      {
5996 		const unsigned char *extra = (const unsigned char *)
5997 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
5998 
5999 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6000 		     workp++)
6001 		  {
6002 		    int32_t *wextra;
6003 		    wextra = (int32_t*)(extra + *workp++);
6004 		    for (i = 0; i < *wextra; ++i)
6005 		      if (TRANSLATE(d[i]) != wextra[1 + i])
6006 			break;
6007 
6008 		    if (i == *wextra)
6009 		      {
6010 			/* Update d, however d will be incremented at
6011 			   char_set_matched:, we decrement d here.  */
6012 			d += i - 1;
6013 			goto char_set_matched;
6014 		      }
6015 		  }
6016 	      }
6017 	    else /* (nrules == 0) */
6018 # endif
6019 	      /* If we can't look up collation data, we use wcscoll
6020 		 instead.  */
6021 	      {
6022 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6023 		  {
6024 		    const CHAR_TYPE *backup_d = d, *backup_dend = dend;
6025 		    length = wcslen(workp);
6026 
6027 		    /* If wcscoll(the collating symbol, whole string) > 0,
6028 		       any substring of the string never match with the
6029 		       collating symbol.  */
6030 		    if (wcscoll(workp, d) > 0)
6031 		      {
6032 			workp += length + 1;
6033 			continue;
6034 		      }
6035 
6036 		    /* First, we compare the collating symbol with
6037 		       the first character of the string.
6038 		       If it don't match, we add the next character to
6039 		       the compare buffer in turn.  */
6040 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6041 		      {
6042 			int match;
6043 			if (d == dend)
6044 			  {
6045 			    if (dend == end_match_2)
6046 			      break;
6047 			    d = string2;
6048 			    dend = end_match_2;
6049 			  }
6050 
6051 			/* add next character to the compare buffer.  */
6052 			str_buf[i] = TRANSLATE(*d);
6053 			str_buf[i+1] = '\0';
6054 
6055 			match = wcscoll(workp, str_buf);
6056 			if (match == 0)
6057 			  goto char_set_matched;
6058 
6059 			if (match < 0)
6060 			  /* (str_buf > workp) indicate (str_buf + X > workp),
6061 			     because for all X (str_buf + X > str_buf).
6062 			     So we don't need continue this loop.  */
6063 			  break;
6064 
6065 			/* Otherwise(str_buf < workp),
6066 			   (str_buf+next_character) may equals (workp).
6067 			   So we continue this loop.  */
6068 		      }
6069 		    /* not matched */
6070 		    d = backup_d;
6071 		    dend = backup_dend;
6072 		    workp += length + 1;
6073 		  }
6074               }
6075             /* match with equivalence_class?  */
6076 # ifdef _LIBC
6077 	    if (nrules != 0)
6078 	      {
6079                 const CHAR_TYPE *backup_d = d, *backup_dend = dend;
6080 		/* Try to match the equivalence class against
6081 		   those known to the collate implementation.  */
6082 		const int32_t *table;
6083 		const int32_t *weights;
6084 		const int32_t *extra;
6085 		const int32_t *indirect;
6086 		int32_t idx, idx2;
6087 		wint_t *cp;
6088 		size_t len;
6089 
6090 		/* This #include defines a local function!  */
6091 #  include <locale/weightwc.h>
6092 
6093 		table = (const int32_t *)
6094 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6095 		weights = (const wint_t *)
6096 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6097 		extra = (const wint_t *)
6098 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6099 		indirect = (const int32_t *)
6100 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6101 
6102 		/* Write 1 collating element to str_buf, and
6103 		   get its index.  */
6104 		idx2 = 0;
6105 
6106 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6107 		  {
6108 		    cp = (wint_t*)str_buf;
6109 		    if (d == dend)
6110 		      {
6111 			if (dend == end_match_2)
6112 			  break;
6113 			d = string2;
6114 			dend = end_match_2;
6115 		      }
6116 		    str_buf[i] = TRANSLATE(*(d+i));
6117 		    str_buf[i+1] = '\0'; /* sentinel */
6118 		    idx2 = findidx ((const wint_t**)&cp);
6119 		  }
6120 
6121 		/* Update d, however d will be incremented at
6122 		   char_set_matched:, we decrement d here.  */
6123 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6124 		if (d >= dend)
6125 		  {
6126 		    if (dend == end_match_2)
6127 			d = dend;
6128 		    else
6129 		      {
6130 			d = string2;
6131 			dend = end_match_2;
6132 		      }
6133 		  }
6134 
6135 		len = weights[idx2];
6136 
6137 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6138 		     workp++)
6139 		  {
6140 		    idx = (int32_t)*workp;
6141 		    /* We already checked idx != 0 in regex_compile. */
6142 
6143 		    if (idx2 != 0 && len == weights[idx])
6144 		      {
6145 			int cnt = 0;
6146 			while (cnt < len && (weights[idx + 1 + cnt]
6147 					     == weights[idx2 + 1 + cnt]))
6148 			  ++cnt;
6149 
6150 			if (cnt == len)
6151 			  goto char_set_matched;
6152 		      }
6153 		  }
6154 		/* not matched */
6155                 d = backup_d;
6156                 dend = backup_dend;
6157 	      }
6158 	    else /* (nrules == 0) */
6159 # endif
6160 	      /* If we can't look up collation data, we use wcscoll
6161 		 instead.  */
6162 	      {
6163 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6164 		  {
6165 		    const CHAR_TYPE *backup_d = d, *backup_dend = dend;
6166 		    length = wcslen(workp);
6167 
6168 		    /* If wcscoll(the collating symbol, whole string) > 0,
6169 		       any substring of the string never match with the
6170 		       collating symbol.  */
6171 		    if (wcscoll(workp, d) > 0)
6172 		      {
6173 			workp += length + 1;
6174 			break;
6175 		      }
6176 
6177 		    /* First, we compare the equivalence class with
6178 		       the first character of the string.
6179 		       If it don't match, we add the next character to
6180 		       the compare buffer in turn.  */
6181 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6182 		      {
6183 			int match;
6184 			if (d == dend)
6185 			  {
6186 			    if (dend == end_match_2)
6187 			      break;
6188 			    d = string2;
6189 			    dend = end_match_2;
6190 			  }
6191 
6192 			/* add next character to the compare buffer.  */
6193 			str_buf[i] = TRANSLATE(*d);
6194 			str_buf[i+1] = '\0';
6195 
6196 			match = wcscoll(workp, str_buf);
6197 
6198 			if (match == 0)
6199 			  goto char_set_matched;
6200 
6201 			if (match < 0)
6202 			/* (str_buf > workp) indicate (str_buf + X > workp),
6203 			   because for all X (str_buf + X > str_buf).
6204 			   So we don't need continue this loop.  */
6205 			  break;
6206 
6207 			/* Otherwise(str_buf < workp),
6208 			   (str_buf+next_character) may equals (workp).
6209 			   So we continue this loop.  */
6210 		      }
6211 		    /* not matched */
6212 		    d = backup_d;
6213 		    dend = backup_dend;
6214 		    workp += length + 1;
6215 		  }
6216 	      }
6217 
6218             /* match with char_range?  */
6219 #ifdef _LIBC
6220 	    if (nrules != 0)
6221 	      {
6222 		uint32_t collseqval;
6223 		const char *collseq = (const char *)
6224 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6225 
6226 		collseqval = collseq_table_lookup (collseq, c);
6227 
6228 		for (; workp < p - chars_length ;)
6229 		  {
6230 		    uint32_t start_val, end_val;
6231 
6232 		    /* We already compute the collation sequence value
6233 		       of the characters (or collating symbols).  */
6234 		    start_val = (uint32_t) *workp++; /* range_start */
6235 		    end_val = (uint32_t) *workp++; /* range_end */
6236 
6237 		    if (start_val <= collseqval && collseqval <= end_val)
6238 		      goto char_set_matched;
6239 		  }
6240 	      }
6241 	    else
6242 #endif
6243 	      {
6244 		/* We set range_start_char at str_buf[0], range_end_char
6245 		   at str_buf[4], and compared char at str_buf[2].  */
6246 		str_buf[1] = 0;
6247 		str_buf[2] = c;
6248 		str_buf[3] = 0;
6249 		str_buf[5] = 0;
6250 		for (; workp < p - chars_length ;)
6251 		  {
6252 		    wchar_t *range_start_char, *range_end_char;
6253 
6254 		    /* match if (range_start_char <= c <= range_end_char).  */
6255 
6256 		    /* If range_start(or end) < 0, we assume -range_start(end)
6257 		       is the offset of the collating symbol which is specified
6258 		       as the character of the range start(end).  */
6259 
6260 		    /* range_start */
6261 		    if (*workp < 0)
6262 		      range_start_char = charset_top - (*workp++);
6263 		    else
6264 		      {
6265 			str_buf[0] = *workp++;
6266 			range_start_char = str_buf;
6267 		      }
6268 
6269 		    /* range_end */
6270 		    if (*workp < 0)
6271 		      range_end_char = charset_top - (*workp++);
6272 		    else
6273 		      {
6274 			str_buf[4] = *workp++;
6275 			range_end_char = str_buf + 4;
6276 		      }
6277 
6278 		    if (wcscoll(range_start_char, str_buf+2) <= 0 &&
6279 			wcscoll(str_buf+2, range_end_char) <= 0)
6280 
6281 		      goto char_set_matched;
6282 		  }
6283 	      }
6284 
6285             /* match with char?  */
6286 	    for (; workp < p ; workp++)
6287 	      if (c == *workp)
6288 		goto char_set_matched;
6289 
6290 	    not = !not;
6291 
6292 	  char_set_matched:
6293 	    if (not) goto fail;
6294 #else
6295             /* Cast to `unsigned' instead of `unsigned char' in case the
6296                bit list is a full 32 bytes long.  */
6297 	    if (c < (unsigned) (*p * BYTEWIDTH)
6298 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6299 	      not = !not;
6300 
6301 	    p += 1 + *p;
6302 
6303 	    if (!not) goto fail;
6304 #undef WORK_BUFFER_SIZE
6305 #endif /* MBS_SUPPORT */
6306 	    SET_REGS_MATCHED ();
6307             d++;
6308 	    break;
6309 	  }
6310 
6311 
6312         /* The beginning of a group is represented by start_memory.
6313            The arguments are the register number in the next byte, and the
6314            number of groups inner to this one in the next.  The text
6315            matched within the group is recorded (in the internal
6316            registers data structure) under the register number.  */
6317         case start_memory:
6318 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6319 			(long int) *p, (long int) p[1]);
6320 
6321           /* Find out if this group can match the empty string.  */
6322 	  p1 = p;		/* To send to group_match_null_string_p.  */
6323 
6324           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6325             REG_MATCH_NULL_STRING_P (reg_info[*p])
6326               = group_match_null_string_p (&p1, pend, reg_info);
6327 
6328           /* Save the position in the string where we were the last time
6329              we were at this open-group operator in case the group is
6330              operated upon by a repetition operator, e.g., with `(a*)*b'
6331              against `ab'; then we want to ignore where we are now in
6332              the string in case this attempt to match fails.  */
6333           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6334                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6335                              : regstart[*p];
6336 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
6337 			 POINTER_TO_OFFSET (old_regstart[*p]));
6338 
6339           regstart[*p] = d;
6340 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6341 
6342           IS_ACTIVE (reg_info[*p]) = 1;
6343           MATCHED_SOMETHING (reg_info[*p]) = 0;
6344 
6345 	  /* Clear this whenever we change the register activity status.  */
6346 	  set_regs_matched_done = 0;
6347 
6348           /* This is the new highest active register.  */
6349           highest_active_reg = *p;
6350 
6351           /* If nothing was active before, this is the new lowest active
6352              register.  */
6353           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6354             lowest_active_reg = *p;
6355 
6356           /* Move past the register number and inner group count.  */
6357           p += 2;
6358 	  just_past_start_mem = p;
6359 
6360           break;
6361 
6362 
6363         /* The stop_memory opcode represents the end of a group.  Its
6364            arguments are the same as start_memory's: the register
6365            number, and the number of inner groups.  */
6366 	case stop_memory:
6367 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6368 			(long int) *p, (long int) p[1]);
6369 
6370           /* We need to save the string position the last time we were at
6371              this close-group operator in case the group is operated
6372              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6373              against `aba'; then we want to ignore where we are now in
6374              the string in case this attempt to match fails.  */
6375           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6376                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
6377 			   : regend[*p];
6378 	  DEBUG_PRINT2 ("      old_regend: %d\n",
6379 			 POINTER_TO_OFFSET (old_regend[*p]));
6380 
6381           regend[*p] = d;
6382 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6383 
6384           /* This register isn't active anymore.  */
6385           IS_ACTIVE (reg_info[*p]) = 0;
6386 
6387 	  /* Clear this whenever we change the register activity status.  */
6388 	  set_regs_matched_done = 0;
6389 
6390           /* If this was the only register active, nothing is active
6391              anymore.  */
6392           if (lowest_active_reg == highest_active_reg)
6393             {
6394               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6395               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6396             }
6397           else
6398             { /* We must scan for the new highest active register, since
6399                  it isn't necessarily one less than now: consider
6400                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
6401                  new highest active register is 1.  */
6402               US_CHAR_TYPE r = *p - 1;
6403               while (r > 0 && !IS_ACTIVE (reg_info[r]))
6404                 r--;
6405 
6406               /* If we end up at register zero, that means that we saved
6407                  the registers as the result of an `on_failure_jump', not
6408                  a `start_memory', and we jumped to past the innermost
6409                  `stop_memory'.  For example, in ((.)*) we save
6410                  registers 1 and 2 as a result of the *, but when we pop
6411                  back to the second ), we are at the stop_memory 1.
6412                  Thus, nothing is active.  */
6413 	      if (r == 0)
6414                 {
6415                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6416                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6417                 }
6418               else
6419                 highest_active_reg = r;
6420             }
6421 
6422           /* If just failed to match something this time around with a
6423              group that's operated on by a repetition operator, try to
6424              force exit from the ``loop'', and restore the register
6425              information for this group that we had before trying this
6426              last match.  */
6427           if ((!MATCHED_SOMETHING (reg_info[*p])
6428                || just_past_start_mem == p - 1)
6429 	      && (p + 2) < pend)
6430             {
6431               boolean is_a_jump_n = false;
6432 
6433               p1 = p + 2;
6434               mcnt = 0;
6435               switch ((re_opcode_t) *p1++)
6436                 {
6437                   case jump_n:
6438 		    is_a_jump_n = true;
6439                   case pop_failure_jump:
6440 		  case maybe_pop_jump:
6441 		  case jump:
6442 		  case dummy_failure_jump:
6443                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6444 		    if (is_a_jump_n)
6445 		      p1 += OFFSET_ADDRESS_SIZE;
6446                     break;
6447 
6448                   default:
6449                     /* do nothing */ ;
6450                 }
6451 	      p1 += mcnt;
6452 
6453               /* If the next operation is a jump backwards in the pattern
6454 	         to an on_failure_jump right before the start_memory
6455                  corresponding to this stop_memory, exit from the loop
6456                  by forcing a failure after pushing on the stack the
6457                  on_failure_jump's jump in the pattern, and d.  */
6458               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6459                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6460 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6461 		{
6462                   /* If this group ever matched anything, then restore
6463                      what its registers were before trying this last
6464                      failed match, e.g., with `(a*)*b' against `ab' for
6465                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
6466                      against `aba' for regend[3].
6467 
6468                      Also restore the registers for inner groups for,
6469                      e.g., `((a*)(b*))*' against `aba' (register 3 would
6470                      otherwise get trashed).  */
6471 
6472                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6473 		    {
6474 		      unsigned r;
6475 
6476                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6477 
6478 		      /* Restore this and inner groups' (if any) registers.  */
6479                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6480 			   r++)
6481                         {
6482                           regstart[r] = old_regstart[r];
6483 
6484                           /* xx why this test?  */
6485                           if (old_regend[r] >= regstart[r])
6486                             regend[r] = old_regend[r];
6487                         }
6488                     }
6489 		  p1++;
6490                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6491                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6492 
6493                   goto fail;
6494                 }
6495             }
6496 
6497           /* Move past the register number and the inner group count.  */
6498           p += 2;
6499           break;
6500 
6501 
6502 	/* \<digit> has been turned into a `duplicate' command which is
6503            followed by the numeric value of <digit> as the register number.  */
6504         case duplicate:
6505 	  {
6506 	    register const CHAR_TYPE *d2, *dend2;
6507 	    int regno = *p++;   /* Get which register to match against.  */
6508 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6509 
6510 	    /* Can't back reference a group which we've never matched.  */
6511             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6512               goto fail;
6513 
6514             /* Where in input to try to start matching.  */
6515             d2 = regstart[regno];
6516 
6517             /* Where to stop matching; if both the place to start and
6518                the place to stop matching are in the same string, then
6519                set to the place to stop, otherwise, for now have to use
6520                the end of the first string.  */
6521 
6522             dend2 = ((FIRST_STRING_P (regstart[regno])
6523 		      == FIRST_STRING_P (regend[regno]))
6524 		     ? regend[regno] : end_match_1);
6525 	    for (;;)
6526 	      {
6527 		/* If necessary, advance to next segment in register
6528                    contents.  */
6529 		while (d2 == dend2)
6530 		  {
6531 		    if (dend2 == end_match_2) break;
6532 		    if (dend2 == regend[regno]) break;
6533 
6534                     /* End of string1 => advance to string2. */
6535                     d2 = string2;
6536                     dend2 = regend[regno];
6537 		  }
6538 		/* At end of register contents => success */
6539 		if (d2 == dend2) break;
6540 
6541 		/* If necessary, advance to next segment in data.  */
6542 		PREFETCH ();
6543 
6544 		/* How many characters left in this segment to match.  */
6545 		mcnt = dend - d;
6546 
6547 		/* Want how many consecutive characters we can match in
6548                    one shot, so, if necessary, adjust the count.  */
6549                 if (mcnt > dend2 - d2)
6550 		  mcnt = dend2 - d2;
6551 
6552 		/* Compare that many; failure if mismatch, else move
6553                    past them.  */
6554 		if (translate
6555                     ? bcmp_translate (d, d2, mcnt, translate)
6556                     : memcmp (d, d2, mcnt*sizeof(US_CHAR_TYPE)))
6557 		  goto fail;
6558 		d += mcnt, d2 += mcnt;
6559 
6560 		/* Do this because we've match some characters.  */
6561 		SET_REGS_MATCHED ();
6562 	      }
6563 	  }
6564 	  break;
6565 
6566 
6567         /* begline matches the empty string at the beginning of the string
6568            (unless `not_bol' is set in `bufp'), and, if
6569            `newline_anchor' is set, after newlines.  */
6570 	case begline:
6571           DEBUG_PRINT1 ("EXECUTING begline.\n");
6572 
6573           if (AT_STRINGS_BEG (d))
6574             {
6575               if (!bufp->not_bol) break;
6576             }
6577           else if (d[-1] == '\n' && bufp->newline_anchor)
6578             {
6579               break;
6580             }
6581           /* In all other cases, we fail.  */
6582           goto fail;
6583 
6584 
6585         /* endline is the dual of begline.  */
6586 	case endline:
6587           DEBUG_PRINT1 ("EXECUTING endline.\n");
6588 
6589           if (AT_STRINGS_END (d))
6590             {
6591               if (!bufp->not_eol) break;
6592             }
6593 
6594           /* We have to ``prefetch'' the next character.  */
6595           else if ((d == end1 ? *string2 : *d) == '\n'
6596                    && bufp->newline_anchor)
6597             {
6598               break;
6599             }
6600           goto fail;
6601 
6602 
6603 	/* Match at the very beginning of the data.  */
6604         case begbuf:
6605           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
6606           if (AT_STRINGS_BEG (d))
6607             break;
6608           goto fail;
6609 
6610 
6611 	/* Match at the very end of the data.  */
6612         case endbuf:
6613           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
6614 	  if (AT_STRINGS_END (d))
6615 	    break;
6616           goto fail;
6617 
6618 
6619         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
6620            pushes NULL as the value for the string on the stack.  Then
6621            `pop_failure_point' will keep the current value for the
6622            string, instead of restoring it.  To see why, consider
6623            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
6624            then the . fails against the \n.  But the next thing we want
6625            to do is match the \n against the \n; if we restored the
6626            string value, we would be back at the foo.
6627 
6628            Because this is used only in specific cases, we don't need to
6629            check all the things that `on_failure_jump' does, to make
6630            sure the right things get saved on the stack.  Hence we don't
6631            share its code.  The only reason to push anything on the
6632            stack at all is that otherwise we would have to change
6633            `anychar's code to do something besides goto fail in this
6634            case; that seems worse than this.  */
6635         case on_failure_keep_string_jump:
6636           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
6637 
6638           EXTRACT_NUMBER_AND_INCR (mcnt, p);
6639 #ifdef _LIBC
6640           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
6641 #else
6642           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
6643 #endif
6644 
6645           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
6646           break;
6647 
6648 
6649 	/* Uses of on_failure_jump:
6650 
6651            Each alternative starts with an on_failure_jump that points
6652            to the beginning of the next alternative.  Each alternative
6653            except the last ends with a jump that in effect jumps past
6654            the rest of the alternatives.  (They really jump to the
6655            ending jump of the following alternative, because tensioning
6656            these jumps is a hassle.)
6657 
6658            Repeats start with an on_failure_jump that points past both
6659            the repetition text and either the following jump or
6660            pop_failure_jump back to this on_failure_jump.  */
6661 	case on_failure_jump:
6662         on_failure:
6663           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
6664 
6665           EXTRACT_NUMBER_AND_INCR (mcnt, p);
6666 #ifdef _LIBC
6667           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
6668 #else
6669           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
6670 #endif
6671 
6672           /* If this on_failure_jump comes right before a group (i.e.,
6673              the original * applied to a group), save the information
6674              for that group and all inner ones, so that if we fail back
6675              to this point, the group's information will be correct.
6676              For example, in \(a*\)*\1, we need the preceding group,
6677              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
6678 
6679           /* We can't use `p' to check ahead because we push
6680              a failure point to `p + mcnt' after we do this.  */
6681           p1 = p;
6682 
6683           /* We need to skip no_op's before we look for the
6684              start_memory in case this on_failure_jump is happening as
6685              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
6686              against aba.  */
6687           while (p1 < pend && (re_opcode_t) *p1 == no_op)
6688             p1++;
6689 
6690           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
6691             {
6692               /* We have a new highest active register now.  This will
6693                  get reset at the start_memory we are about to get to,
6694                  but we will have saved all the registers relevant to
6695                  this repetition op, as described above.  */
6696               highest_active_reg = *(p1 + 1) + *(p1 + 2);
6697               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6698                 lowest_active_reg = *(p1 + 1);
6699             }
6700 
6701           DEBUG_PRINT1 (":\n");
6702           PUSH_FAILURE_POINT (p + mcnt, d, -2);
6703           break;
6704 
6705 
6706         /* A smart repeat ends with `maybe_pop_jump'.
6707 	   We change it to either `pop_failure_jump' or `jump'.  */
6708         case maybe_pop_jump:
6709           EXTRACT_NUMBER_AND_INCR (mcnt, p);
6710           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
6711           {
6712 	    register US_CHAR_TYPE *p2 = p;
6713 
6714             /* Compare the beginning of the repeat with what in the
6715                pattern follows its end. If we can establish that there
6716                is nothing that they would both match, i.e., that we
6717                would have to backtrack because of (as in, e.g., `a*a')
6718                then we can change to pop_failure_jump, because we'll
6719                never have to backtrack.
6720 
6721                This is not true in the case of alternatives: in
6722                `(a|ab)*' we do need to backtrack to the `ab' alternative
6723                (e.g., if the string was `ab').  But instead of trying to
6724                detect that here, the alternative has put on a dummy
6725                failure point which is what we will end up popping.  */
6726 
6727 	    /* Skip over open/close-group commands.
6728 	       If what follows this loop is a ...+ construct,
6729 	       look at what begins its body, since we will have to
6730 	       match at least one of that.  */
6731 	    while (1)
6732 	      {
6733 		if (p2 + 2 < pend
6734 		    && ((re_opcode_t) *p2 == stop_memory
6735 			|| (re_opcode_t) *p2 == start_memory))
6736 		  p2 += 3;
6737 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
6738 			 && (re_opcode_t) *p2 == dummy_failure_jump)
6739 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
6740 		else
6741 		  break;
6742 	      }
6743 
6744 	    p1 = p + mcnt;
6745 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
6746 	       to the `maybe_finalize_jump' of this case.  Examine what
6747 	       follows.  */
6748 
6749             /* If we're at the end of the pattern, we can change.  */
6750             if (p2 == pend)
6751 	      {
6752 		/* Consider what happens when matching ":\(.*\)"
6753 		   against ":/".  I don't really understand this code
6754 		   yet.  */
6755   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (US_CHAR_TYPE)
6756 		  pop_failure_jump;
6757                 DEBUG_PRINT1
6758                   ("  End of pattern: change to `pop_failure_jump'.\n");
6759               }
6760 
6761             else if ((re_opcode_t) *p2 == exactn
6762 #ifdef MBS_SUPPORT
6763 		     || (re_opcode_t) *p2 == exactn_bin
6764 #endif
6765 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
6766 	      {
6767 		register US_CHAR_TYPE c
6768                   = *p2 == (US_CHAR_TYPE) endline ? '\n' : p2[2];
6769 
6770                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
6771 #ifdef MBS_SUPPORT
6772 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
6773 #endif
6774 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
6775                   {
6776   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (US_CHAR_TYPE)
6777 		      pop_failure_jump;
6778 #ifdef MBS_SUPPORT
6779 		    if (MB_CUR_MAX != 1)
6780 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
6781 				    (wint_t) c,
6782 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
6783 		    else
6784 #endif
6785 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
6786 				    (char) c,
6787 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
6788                   }
6789 
6790 #ifndef MBS_SUPPORT
6791 		else if ((re_opcode_t) p1[3] == charset
6792 			 || (re_opcode_t) p1[3] == charset_not)
6793 		  {
6794 		    int not = (re_opcode_t) p1[3] == charset_not;
6795 
6796 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
6797 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6798 		      not = !not;
6799 
6800                     /* `not' is equal to 1 if c would match, which means
6801                         that we can't change to pop_failure_jump.  */
6802 		    if (!not)
6803                       {
6804   		        p[-3] = (unsigned char) pop_failure_jump;
6805                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
6806                       }
6807 		  }
6808 #endif /* not MBS_SUPPORT */
6809 	      }
6810 #ifndef MBS_SUPPORT
6811             else if ((re_opcode_t) *p2 == charset)
6812 	      {
6813 		/* We win if the first character of the loop is not part
6814                    of the charset.  */
6815                 if ((re_opcode_t) p1[3] == exactn
6816  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
6817  			  && (p2[2 + p1[5] / BYTEWIDTH]
6818  			      & (1 << (p1[5] % BYTEWIDTH)))))
6819 		  {
6820 		    p[-3] = (unsigned char) pop_failure_jump;
6821 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
6822                   }
6823 
6824 		else if ((re_opcode_t) p1[3] == charset_not)
6825 		  {
6826 		    int idx;
6827 		    /* We win if the charset_not inside the loop
6828 		       lists every character listed in the charset after.  */
6829 		    for (idx = 0; idx < (int) p2[1]; idx++)
6830 		      if (! (p2[2 + idx] == 0
6831 			     || (idx < (int) p1[4]
6832 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
6833 			break;
6834 
6835 		    if (idx == p2[1])
6836                       {
6837   		        p[-3] = (unsigned char) pop_failure_jump;
6838                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
6839                       }
6840 		  }
6841 		else if ((re_opcode_t) p1[3] == charset)
6842 		  {
6843 		    int idx;
6844 		    /* We win if the charset inside the loop
6845 		       has no overlap with the one after the loop.  */
6846 		    for (idx = 0;
6847 			 idx < (int) p2[1] && idx < (int) p1[4];
6848 			 idx++)
6849 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
6850 			break;
6851 
6852 		    if (idx == p2[1] || idx == p1[4])
6853                       {
6854   		        p[-3] = (unsigned char) pop_failure_jump;
6855                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
6856                       }
6857 		  }
6858 	      }
6859 #endif /* not MBS_SUPPORT */
6860 	  }
6861 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
6862 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
6863 	    {
6864 	      p[-1] = (US_CHAR_TYPE) jump;
6865               DEBUG_PRINT1 ("  Match => jump.\n");
6866 	      goto unconditional_jump;
6867 	    }
6868         /* Note fall through.  */
6869 
6870 
6871 	/* The end of a simple repeat has a pop_failure_jump back to
6872            its matching on_failure_jump, where the latter will push a
6873            failure point.  The pop_failure_jump takes off failure
6874            points put on by this pop_failure_jump's matching
6875            on_failure_jump; we got through the pattern to here from the
6876            matching on_failure_jump, so didn't fail.  */
6877         case pop_failure_jump:
6878           {
6879             /* We need to pass separate storage for the lowest and
6880                highest registers, even though we don't care about the
6881                actual values.  Otherwise, we will restore only one
6882                register from the stack, since lowest will == highest in
6883                `pop_failure_point'.  */
6884             active_reg_t dummy_low_reg, dummy_high_reg;
6885             US_CHAR_TYPE *pdummy = NULL;
6886             const CHAR_TYPE *sdummy = NULL;
6887 
6888             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
6889             POP_FAILURE_POINT (sdummy, pdummy,
6890                                dummy_low_reg, dummy_high_reg,
6891                                reg_dummy, reg_dummy, reg_info_dummy);
6892           }
6893 	  /* Note fall through.  */
6894 
6895 	unconditional_jump:
6896 #ifdef _LIBC
6897 	  DEBUG_PRINT2 ("\n%p: ", p);
6898 #else
6899 	  DEBUG_PRINT2 ("\n0x%x: ", p);
6900 #endif
6901           /* Note fall through.  */
6902 
6903         /* Unconditionally jump (without popping any failure points).  */
6904         case jump:
6905 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
6906           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
6907 	  p += mcnt;				/* Do the jump.  */
6908 #ifdef _LIBC
6909           DEBUG_PRINT2 ("(to %p).\n", p);
6910 #else
6911           DEBUG_PRINT2 ("(to 0x%x).\n", p);
6912 #endif
6913 	  break;
6914 
6915 
6916         /* We need this opcode so we can detect where alternatives end
6917            in `group_match_null_string_p' et al.  */
6918         case jump_past_alt:
6919           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
6920           goto unconditional_jump;
6921 
6922 
6923         /* Normally, the on_failure_jump pushes a failure point, which
6924            then gets popped at pop_failure_jump.  We will end up at
6925            pop_failure_jump, also, and with a pattern of, say, `a+', we
6926            are skipping over the on_failure_jump, so we have to push
6927            something meaningless for pop_failure_jump to pop.  */
6928         case dummy_failure_jump:
6929           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
6930           /* It doesn't matter what we push for the string here.  What
6931              the code at `fail' tests is the value for the pattern.  */
6932           PUSH_FAILURE_POINT (NULL, NULL, -2);
6933           goto unconditional_jump;
6934 
6935 
6936         /* At the end of an alternative, we need to push a dummy failure
6937            point in case we are followed by a `pop_failure_jump', because
6938            we don't want the failure point for the alternative to be
6939            popped.  For example, matching `(a|ab)*' against `aab'
6940            requires that we match the `ab' alternative.  */
6941         case push_dummy_failure:
6942           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
6943           /* See comments just above at `dummy_failure_jump' about the
6944              two zeroes.  */
6945           PUSH_FAILURE_POINT (NULL, NULL, -2);
6946           break;
6947 
6948         /* Have to succeed matching what follows at least n times.
6949            After that, handle like `on_failure_jump'.  */
6950         case succeed_n:
6951           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
6952           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
6953 
6954           assert (mcnt >= 0);
6955           /* Originally, this is how many times we HAVE to succeed.  */
6956           if (mcnt > 0)
6957             {
6958                mcnt--;
6959 	       p += OFFSET_ADDRESS_SIZE;
6960                STORE_NUMBER_AND_INCR (p, mcnt);
6961 #ifdef _LIBC
6962                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
6963 			     , mcnt);
6964 #else
6965                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
6966 			     , mcnt);
6967 #endif
6968             }
6969 	  else if (mcnt == 0)
6970             {
6971 #ifdef _LIBC
6972               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
6973 			    p + OFFSET_ADDRESS_SIZE);
6974 #else
6975               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
6976 			    p + OFFSET_ADDRESS_SIZE);
6977 #endif /* _LIBC */
6978 
6979 #ifdef MBS_SUPPORT
6980 	      p[1] = (US_CHAR_TYPE) no_op;
6981 #else
6982 	      p[2] = (US_CHAR_TYPE) no_op;
6983               p[3] = (US_CHAR_TYPE) no_op;
6984 #endif /* MBS_SUPPORT */
6985               goto on_failure;
6986             }
6987           break;
6988 
6989         case jump_n:
6990           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
6991           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
6992 
6993           /* Originally, this is how many times we CAN jump.  */
6994           if (mcnt)
6995             {
6996                mcnt--;
6997                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
6998 
6999 #ifdef _LIBC
7000                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7001 			     mcnt);
7002 #else
7003                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7004 			     mcnt);
7005 #endif /* _LIBC */
7006 	       goto unconditional_jump;
7007             }
7008           /* If don't have to jump any more, skip over the rest of command.  */
7009 	  else
7010 	    p += 2 * OFFSET_ADDRESS_SIZE;
7011           break;
7012 
7013 	case set_number_at:
7014 	  {
7015             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7016 
7017             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7018             p1 = p + mcnt;
7019             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7020 #ifdef _LIBC
7021             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
7022 #else
7023             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
7024 #endif
7025 	    STORE_NUMBER (p1, mcnt);
7026             break;
7027           }
7028 
7029 #if 0
7030 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7031 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
7032 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
7033 	   macro and introducing temporary variables works around the bug.  */
7034 
7035 	case wordbound:
7036 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7037 	  if (AT_WORD_BOUNDARY (d))
7038 	    break;
7039 	  goto fail;
7040 
7041 	case notwordbound:
7042 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7043 	  if (AT_WORD_BOUNDARY (d))
7044 	    goto fail;
7045 	  break;
7046 #else
7047 	case wordbound:
7048 	{
7049 	  boolean prevchar, thischar;
7050 
7051 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7052 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7053 	    break;
7054 
7055 	  prevchar = WORDCHAR_P (d - 1);
7056 	  thischar = WORDCHAR_P (d);
7057 	  if (prevchar != thischar)
7058 	    break;
7059 	  goto fail;
7060 	}
7061 
7062       case notwordbound:
7063 	{
7064 	  boolean prevchar, thischar;
7065 
7066 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7067 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7068 	    goto fail;
7069 
7070 	  prevchar = WORDCHAR_P (d - 1);
7071 	  thischar = WORDCHAR_P (d);
7072 	  if (prevchar != thischar)
7073 	    goto fail;
7074 	  break;
7075 	}
7076 #endif
7077 
7078 	case wordbeg:
7079           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7080 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7081 	    break;
7082           goto fail;
7083 
7084 	case wordend:
7085           DEBUG_PRINT1 ("EXECUTING wordend.\n");
7086 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7087               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
7088 	    break;
7089           goto fail;
7090 
7091 #ifdef emacs
7092   	case before_dot:
7093           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7094  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7095   	    goto fail;
7096   	  break;
7097 
7098   	case at_dot:
7099           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7100  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
7101   	    goto fail;
7102   	  break;
7103 
7104   	case after_dot:
7105           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7106           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7107   	    goto fail;
7108   	  break;
7109 
7110 	case syntaxspec:
7111           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7112 	  mcnt = *p++;
7113 	  goto matchsyntax;
7114 
7115         case wordchar:
7116           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7117 	  mcnt = (int) Sword;
7118         matchsyntax:
7119 	  PREFETCH ();
7120 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7121 	  d++;
7122 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7123 	    goto fail;
7124           SET_REGS_MATCHED ();
7125 	  break;
7126 
7127 	case notsyntaxspec:
7128           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7129 	  mcnt = *p++;
7130 	  goto matchnotsyntax;
7131 
7132         case notwordchar:
7133           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7134 	  mcnt = (int) Sword;
7135         matchnotsyntax:
7136 	  PREFETCH ();
7137 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7138 	  d++;
7139 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7140 	    goto fail;
7141 	  SET_REGS_MATCHED ();
7142           break;
7143 
7144 #else /* not emacs */
7145 	case wordchar:
7146           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7147 	  PREFETCH ();
7148           if (!WORDCHAR_P (d))
7149             goto fail;
7150 	  SET_REGS_MATCHED ();
7151           d++;
7152 	  break;
7153 
7154 	case notwordchar:
7155           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7156 	  PREFETCH ();
7157 	  if (WORDCHAR_P (d))
7158             goto fail;
7159           SET_REGS_MATCHED ();
7160           d++;
7161 	  break;
7162 #endif /* not emacs */
7163 
7164         default:
7165           abort ();
7166 	}
7167       continue;  /* Successfully executed one pattern command; keep going.  */
7168 
7169 
7170     /* We goto here if a matching operation fails. */
7171     fail:
7172       if (!FAIL_STACK_EMPTY ())
7173 	{ /* A restart point is known.  Restore to that state.  */
7174           DEBUG_PRINT1 ("\nFAIL:\n");
7175           POP_FAILURE_POINT (d, p,
7176                              lowest_active_reg, highest_active_reg,
7177                              regstart, regend, reg_info);
7178 
7179           /* If this failure point is a dummy, try the next one.  */
7180           if (!p)
7181 	    goto fail;
7182 
7183           /* If we failed to the end of the pattern, don't examine *p.  */
7184 	  assert (p <= pend);
7185           if (p < pend)
7186             {
7187               boolean is_a_jump_n = false;
7188 
7189               /* If failed to a backwards jump that's part of a repetition
7190                  loop, need to pop this failure point and use the next one.  */
7191               switch ((re_opcode_t) *p)
7192                 {
7193                 case jump_n:
7194                   is_a_jump_n = true;
7195                 case maybe_pop_jump:
7196                 case pop_failure_jump:
7197                 case jump:
7198                   p1 = p + 1;
7199                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7200                   p1 += mcnt;
7201 
7202                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7203                       || (!is_a_jump_n
7204                           && (re_opcode_t) *p1 == on_failure_jump))
7205                     goto fail;
7206                   break;
7207                 default:
7208                   /* do nothing */ ;
7209                 }
7210             }
7211 
7212           if (d >= string1 && d <= end1)
7213 	    dend = end_match_1;
7214         }
7215       else
7216         break;   /* Matching at this starting point really fails.  */
7217     } /* for (;;) */
7218 
7219   if (best_regs_set)
7220     goto restore_best_regs;
7221 
7222   FREE_VARIABLES ();
7223 
7224   return -1;         			/* Failure to match.  */
7225 } /* re_match_2 */
7226 
7227 /* Subroutine definitions for re_match_2.  */
7228 
7229 
7230 /* We are passed P pointing to a register number after a start_memory.
7231 
7232    Return true if the pattern up to the corresponding stop_memory can
7233    match the empty string, and false otherwise.
7234 
7235    If we find the matching stop_memory, sets P to point to one past its number.
7236    Otherwise, sets P to an undefined byte less than or equal to END.
7237 
7238    We don't handle duplicates properly (yet).  */
7239 
7240 static boolean
7241 group_match_null_string_p (p, end, reg_info)
7242     US_CHAR_TYPE **p, *end;
7243     register_info_type *reg_info;
7244 {
7245   int mcnt;
7246   /* Point to after the args to the start_memory.  */
7247   US_CHAR_TYPE *p1 = *p + 2;
7248 
7249   while (p1 < end)
7250     {
7251       /* Skip over opcodes that can match nothing, and return true or
7252 	 false, as appropriate, when we get to one that can't, or to the
7253          matching stop_memory.  */
7254 
7255       switch ((re_opcode_t) *p1)
7256         {
7257         /* Could be either a loop or a series of alternatives.  */
7258         case on_failure_jump:
7259           p1++;
7260           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7261 
7262           /* If the next operation is not a jump backwards in the
7263 	     pattern.  */
7264 
7265 	  if (mcnt >= 0)
7266 	    {
7267               /* Go through the on_failure_jumps of the alternatives,
7268                  seeing if any of the alternatives cannot match nothing.
7269                  The last alternative starts with only a jump,
7270                  whereas the rest start with on_failure_jump and end
7271                  with a jump, e.g., here is the pattern for `a|b|c':
7272 
7273                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7274                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7275                  /exactn/1/c
7276 
7277                  So, we have to first go through the first (n-1)
7278                  alternatives and then deal with the last one separately.  */
7279 
7280 
7281               /* Deal with the first (n-1) alternatives, which start
7282                  with an on_failure_jump (see above) that jumps to right
7283                  past a jump_past_alt.  */
7284 
7285               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7286 		     jump_past_alt)
7287                 {
7288                   /* `mcnt' holds how many bytes long the alternative
7289                      is, including the ending `jump_past_alt' and
7290                      its number.  */
7291 
7292                   if (!alt_match_null_string_p (p1, p1 + mcnt -
7293 						(1 + OFFSET_ADDRESS_SIZE),
7294 						reg_info))
7295                     return false;
7296 
7297                   /* Move to right after this alternative, including the
7298 		     jump_past_alt.  */
7299                   p1 += mcnt;
7300 
7301                   /* Break if it's the beginning of an n-th alternative
7302                      that doesn't begin with an on_failure_jump.  */
7303                   if ((re_opcode_t) *p1 != on_failure_jump)
7304                     break;
7305 
7306 		  /* Still have to check that it's not an n-th
7307 		     alternative that starts with an on_failure_jump.  */
7308 		  p1++;
7309                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7310                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7311 		      jump_past_alt)
7312                     {
7313 		      /* Get to the beginning of the n-th alternative.  */
7314                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
7315                       break;
7316                     }
7317                 }
7318 
7319               /* Deal with the last alternative: go back and get number
7320                  of the `jump_past_alt' just before it.  `mcnt' contains
7321                  the length of the alternative.  */
7322               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7323 
7324               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
7325                 return false;
7326 
7327               p1 += mcnt;	/* Get past the n-th alternative.  */
7328             } /* if mcnt > 0 */
7329           break;
7330 
7331 
7332         case stop_memory:
7333 	  assert (p1[1] == **p);
7334           *p = p1 + 2;
7335           return true;
7336 
7337 
7338         default:
7339           if (!common_op_match_null_string_p (&p1, end, reg_info))
7340             return false;
7341         }
7342     } /* while p1 < end */
7343 
7344   return false;
7345 } /* group_match_null_string_p */
7346 
7347 
7348 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7349    It expects P to be the first byte of a single alternative and END one
7350    byte past the last. The alternative can contain groups.  */
7351 
7352 static boolean
7353 alt_match_null_string_p (p, end, reg_info)
7354     US_CHAR_TYPE *p, *end;
7355     register_info_type *reg_info;
7356 {
7357   int mcnt;
7358   US_CHAR_TYPE *p1 = p;
7359 
7360   while (p1 < end)
7361     {
7362       /* Skip over opcodes that can match nothing, and break when we get
7363          to one that can't.  */
7364 
7365       switch ((re_opcode_t) *p1)
7366         {
7367 	/* It's a loop.  */
7368         case on_failure_jump:
7369           p1++;
7370           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7371           p1 += mcnt;
7372           break;
7373 
7374 	default:
7375           if (!common_op_match_null_string_p (&p1, end, reg_info))
7376             return false;
7377         }
7378     }  /* while p1 < end */
7379 
7380   return true;
7381 } /* alt_match_null_string_p */
7382 
7383 
7384 /* Deals with the ops common to group_match_null_string_p and
7385    alt_match_null_string_p.
7386 
7387    Sets P to one after the op and its arguments, if any.  */
7388 
7389 static boolean
7390 common_op_match_null_string_p (p, end, reg_info)
7391     US_CHAR_TYPE **p, *end;
7392     register_info_type *reg_info;
7393 {
7394   int mcnt;
7395   boolean ret;
7396   int reg_no;
7397   US_CHAR_TYPE *p1 = *p;
7398 
7399   switch ((re_opcode_t) *p1++)
7400     {
7401     case no_op:
7402     case begline:
7403     case endline:
7404     case begbuf:
7405     case endbuf:
7406     case wordbeg:
7407     case wordend:
7408     case wordbound:
7409     case notwordbound:
7410 #ifdef emacs
7411     case before_dot:
7412     case at_dot:
7413     case after_dot:
7414 #endif
7415       break;
7416 
7417     case start_memory:
7418       reg_no = *p1;
7419       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7420       ret = group_match_null_string_p (&p1, end, reg_info);
7421 
7422       /* Have to set this here in case we're checking a group which
7423          contains a group and a back reference to it.  */
7424 
7425       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7426         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7427 
7428       if (!ret)
7429         return false;
7430       break;
7431 
7432     /* If this is an optimized succeed_n for zero times, make the jump.  */
7433     case jump:
7434       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7435       if (mcnt >= 0)
7436         p1 += mcnt;
7437       else
7438         return false;
7439       break;
7440 
7441     case succeed_n:
7442       /* Get to the number of times to succeed.  */
7443       p1 += OFFSET_ADDRESS_SIZE;
7444       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7445 
7446       if (mcnt == 0)
7447         {
7448           p1 -= 2 * OFFSET_ADDRESS_SIZE;
7449           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7450           p1 += mcnt;
7451         }
7452       else
7453         return false;
7454       break;
7455 
7456     case duplicate:
7457       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7458         return false;
7459       break;
7460 
7461     case set_number_at:
7462       p1 += 2 * OFFSET_ADDRESS_SIZE;
7463 
7464     default:
7465       /* All other opcodes mean we cannot match the empty string.  */
7466       return false;
7467   }
7468 
7469   *p = p1;
7470   return true;
7471 } /* common_op_match_null_string_p */
7472 
7473 
7474 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7475    bytes; nonzero otherwise.  */
7476 
7477 static int
7478 bcmp_translate (s1, s2, len, translate)
7479      const CHAR_TYPE *s1, *s2;
7480      register int len;
7481      RE_TRANSLATE_TYPE translate;
7482 {
7483   register const US_CHAR_TYPE *p1 = (const US_CHAR_TYPE *) s1;
7484   register const US_CHAR_TYPE *p2 = (const US_CHAR_TYPE *) s2;
7485   while (len)
7486     {
7487 #ifdef MBS_SUPPORT
7488       if (((*p1<=0xff)?translate[*p1++]:*p1++)
7489 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
7490 	return 1;
7491 #else
7492       if (translate[*p1++] != translate[*p2++]) return 1;
7493 #endif /* MBS_SUPPORT */
7494       len--;
7495     }
7496   return 0;
7497 }
7498 
7499 /* Entry points for GNU code.  */
7500 
7501 /* re_compile_pattern is the GNU regular expression compiler: it
7502    compiles PATTERN (of length SIZE) and puts the result in BUFP.
7503    Returns 0 if the pattern was valid, otherwise an error string.
7504 
7505    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7506    are set in BUFP on entry.
7507 
7508    We call regex_compile to do the actual compilation.  */
7509 
7510 const char *
7511 re_compile_pattern (pattern, length, bufp)
7512      const char *pattern;
7513      size_t length;
7514      struct re_pattern_buffer *bufp;
7515 {
7516   reg_errcode_t ret;
7517 
7518   /* GNU code is written to assume at least RE_NREGS registers will be set
7519      (and at least one extra will be -1).  */
7520   bufp->regs_allocated = REGS_UNALLOCATED;
7521 
7522   /* And GNU code determines whether or not to get register information
7523      by passing null for the REGS argument to re_match, etc., not by
7524      setting no_sub.  */
7525   bufp->no_sub = 0;
7526 
7527   /* Match anchors at newline.  */
7528   bufp->newline_anchor = 1;
7529 
7530   ret = regex_compile (pattern, length, re_syntax_options, bufp);
7531 
7532   if (!ret)
7533     return NULL;
7534   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
7535 }
7536 #ifdef _LIBC
7537 weak_alias (__re_compile_pattern, re_compile_pattern)
7538 #endif
7539 
7540 /* Entry points compatible with 4.2 BSD regex library.  We don't define
7541    them unless specifically requested.  */
7542 
7543 #if defined _REGEX_RE_COMP || defined _LIBC
7544 
7545 /* BSD has one and only one pattern buffer.  */
7546 static struct re_pattern_buffer re_comp_buf;
7547 
7548 char *
7549 #ifdef _LIBC
7550 /* Make these definitions weak in libc, so POSIX programs can redefine
7551    these names if they don't use our functions, and still use
7552    regcomp/regexec below without link errors.  */
7553 weak_function
7554 #endif
7555 re_comp (s)
7556     const char *s;
7557 {
7558   reg_errcode_t ret;
7559 
7560   if (!s)
7561     {
7562       if (!re_comp_buf.buffer)
7563 	return gettext ("No previous regular expression");
7564       return 0;
7565     }
7566 
7567   if (!re_comp_buf.buffer)
7568     {
7569       re_comp_buf.buffer = (unsigned char *) malloc (200);
7570       if (re_comp_buf.buffer == NULL)
7571         return (char *) gettext (re_error_msgid
7572 				 + re_error_msgid_idx[(int) REG_ESPACE]);
7573       re_comp_buf.allocated = 200;
7574 
7575       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
7576       if (re_comp_buf.fastmap == NULL)
7577 	return (char *) gettext (re_error_msgid
7578 				 + re_error_msgid_idx[(int) REG_ESPACE]);
7579     }
7580 
7581   /* Since `re_exec' always passes NULL for the `regs' argument, we
7582      don't need to initialize the pattern buffer fields which affect it.  */
7583 
7584   /* Match anchors at newlines.  */
7585   re_comp_buf.newline_anchor = 1;
7586 
7587   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7588 
7589   if (!ret)
7590     return NULL;
7591 
7592   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
7593   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
7594 }
7595 
7596 
7597 int
7598 #ifdef _LIBC
7599 weak_function
7600 #endif
7601 re_exec (s)
7602     const char *s;
7603 {
7604   const int len = strlen (s);
7605   return
7606     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
7607 }
7608 
7609 #endif /* _REGEX_RE_COMP */
7610 
7611 /* POSIX.2 functions.  Don't define these for Emacs.  */
7612 
7613 #ifndef emacs
7614 
7615 /* regcomp takes a regular expression as a string and compiles it.
7616 
7617    PREG is a regex_t *.  We do not expect any fields to be initialized,
7618    since POSIX says we shouldn't.  Thus, we set
7619 
7620      `buffer' to the compiled pattern;
7621      `used' to the length of the compiled pattern;
7622      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7623        REG_EXTENDED bit in CFLAGS is set; otherwise, to
7624        RE_SYNTAX_POSIX_BASIC;
7625      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7626      `fastmap' to an allocated space for the fastmap;
7627      `fastmap_accurate' to zero;
7628      `re_nsub' to the number of subexpressions in PATTERN.
7629 
7630    PATTERN is the address of the pattern string.
7631 
7632    CFLAGS is a series of bits which affect compilation.
7633 
7634      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7635      use POSIX basic syntax.
7636 
7637      If REG_NEWLINE is set, then . and [^...] don't match newline.
7638      Also, regexec will try a match beginning after every newline.
7639 
7640      If REG_ICASE is set, then we considers upper- and lowercase
7641      versions of letters to be equivalent when matching.
7642 
7643      If REG_NOSUB is set, then when PREG is passed to regexec, that
7644      routine will report only success or failure, and nothing about the
7645      registers.
7646 
7647    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
7648    the return codes and their meanings.)  */
7649 
7650 int
7651 regcomp (preg, pattern, cflags)
7652     regex_t *preg;
7653     const char *pattern;
7654     int cflags;
7655 {
7656   reg_errcode_t ret;
7657   reg_syntax_t syntax
7658     = (cflags & REG_EXTENDED) ?
7659       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7660 
7661   /* regex_compile will allocate the space for the compiled pattern.  */
7662   preg->buffer = 0;
7663   preg->allocated = 0;
7664   preg->used = 0;
7665 
7666   /* Try to allocate space for the fastmap.  */
7667   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
7668 
7669   if (cflags & REG_ICASE)
7670     {
7671       unsigned i;
7672 
7673       preg->translate
7674 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
7675 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
7676       if (preg->translate == NULL)
7677         return (int) REG_ESPACE;
7678 
7679       /* Map uppercase characters to corresponding lowercase ones.  */
7680       for (i = 0; i < CHAR_SET_SIZE; i++)
7681         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
7682     }
7683   else
7684     preg->translate = NULL;
7685 
7686   /* If REG_NEWLINE is set, newlines are treated differently.  */
7687   if (cflags & REG_NEWLINE)
7688     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
7689       syntax &= ~RE_DOT_NEWLINE;
7690       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
7691       /* It also changes the matching behavior.  */
7692       preg->newline_anchor = 1;
7693     }
7694   else
7695     preg->newline_anchor = 0;
7696 
7697   preg->no_sub = !!(cflags & REG_NOSUB);
7698 
7699   /* POSIX says a null character in the pattern terminates it, so we
7700      can use strlen here in compiling the pattern.  */
7701   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
7702 
7703   /* POSIX doesn't distinguish between an unmatched open-group and an
7704      unmatched close-group: both are REG_EPAREN.  */
7705   if (ret == REG_ERPAREN) ret = REG_EPAREN;
7706 
7707   if (ret == REG_NOERROR && preg->fastmap)
7708     {
7709       /* Compute the fastmap now, since regexec cannot modify the pattern
7710 	 buffer.  */
7711       if (re_compile_fastmap (preg) == -2)
7712 	{
7713 	  /* Some error occurred while computing the fastmap, just forget
7714 	     about it.  */
7715 	  free (preg->fastmap);
7716 	  preg->fastmap = NULL;
7717 	}
7718     }
7719 
7720   return (int) ret;
7721 }
7722 #ifdef _LIBC
7723 weak_alias (__regcomp, regcomp)
7724 #endif
7725 
7726 
7727 /* regexec searches for a given pattern, specified by PREG, in the
7728    string STRING.
7729 
7730    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
7731    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
7732    least NMATCH elements, and we set them to the offsets of the
7733    corresponding matched substrings.
7734 
7735    EFLAGS specifies `execution flags' which affect matching: if
7736    REG_NOTBOL is set, then ^ does not match at the beginning of the
7737    string; if REG_NOTEOL is set, then $ does not match at the end.
7738 
7739    We return 0 if we find a match and REG_NOMATCH if not.  */
7740 
7741 int
7742 regexec (preg, string, nmatch, pmatch, eflags)
7743     const regex_t *preg;
7744     const char *string;
7745     size_t nmatch;
7746     regmatch_t pmatch[];
7747     int eflags;
7748 {
7749   int ret;
7750   struct re_registers regs;
7751   regex_t private_preg;
7752   int len = strlen (string);
7753   boolean want_reg_info = !preg->no_sub && nmatch > 0;
7754 
7755   private_preg = *preg;
7756 
7757   private_preg.not_bol = !!(eflags & REG_NOTBOL);
7758   private_preg.not_eol = !!(eflags & REG_NOTEOL);
7759 
7760   /* The user has told us exactly how many registers to return
7761      information about, via `nmatch'.  We have to pass that on to the
7762      matching routines.  */
7763   private_preg.regs_allocated = REGS_FIXED;
7764 
7765   if (want_reg_info)
7766     {
7767       regs.num_regs = nmatch;
7768       regs.start = TALLOC (nmatch * 2, regoff_t);
7769       if (regs.start == NULL)
7770         return (int) REG_NOMATCH;
7771       regs.end = regs.start + nmatch;
7772     }
7773 
7774   /* Perform the searching operation.  */
7775   ret = re_search (&private_preg, string, len,
7776                    /* start: */ 0, /* range: */ len,
7777                    want_reg_info ? &regs : (struct re_registers *) 0);
7778 
7779   /* Copy the register information to the POSIX structure.  */
7780   if (want_reg_info)
7781     {
7782       if (ret >= 0)
7783         {
7784           unsigned r;
7785 
7786           for (r = 0; r < nmatch; r++)
7787             {
7788               pmatch[r].rm_so = regs.start[r];
7789               pmatch[r].rm_eo = regs.end[r];
7790             }
7791         }
7792 
7793       /* If we needed the temporary register info, free the space now.  */
7794       free (regs.start);
7795     }
7796 
7797   /* We want zero return to mean success, unlike `re_search'.  */
7798   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
7799 }
7800 #ifdef _LIBC
7801 weak_alias (__regexec, regexec)
7802 #endif
7803 
7804 
7805 /* Returns a message corresponding to an error code, ERRCODE, returned
7806    from either regcomp or regexec.   We don't use PREG here.  */
7807 
7808 size_t
7809 regerror (errcode, preg, errbuf, errbuf_size)
7810     int errcode;
7811     const regex_t *preg;
7812     char *errbuf;
7813     size_t errbuf_size;
7814 {
7815   const char *msg;
7816   size_t msg_size;
7817 
7818   if (errcode < 0
7819       || errcode >= (int) (sizeof (re_error_msgid_idx)
7820 			   / sizeof (re_error_msgid_idx[0])))
7821     /* Only error codes returned by the rest of the code should be passed
7822        to this routine.  If we are given anything else, or if other regex
7823        code generates an invalid error code, then the program has a bug.
7824        Dump core so we can fix it.  */
7825     abort ();
7826 
7827   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
7828 
7829   msg_size = strlen (msg) + 1; /* Includes the null.  */
7830 
7831   if (errbuf_size != 0)
7832     {
7833       if (msg_size > errbuf_size)
7834         {
7835 #if defined HAVE_MEMPCPY || defined _LIBC
7836 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
7837 #else
7838           memcpy (errbuf, msg, errbuf_size - 1);
7839           errbuf[errbuf_size - 1] = 0;
7840 #endif
7841         }
7842       else
7843         memcpy (errbuf, msg, msg_size);
7844     }
7845 
7846   return msg_size;
7847 }
7848 #ifdef _LIBC
7849 weak_alias (__regerror, regerror)
7850 #endif
7851 
7852 
7853 /* Free dynamically allocated space used by PREG.  */
7854 
7855 void
7856 regfree (preg)
7857     regex_t *preg;
7858 {
7859   if (preg->buffer != NULL)
7860     free (preg->buffer);
7861   preg->buffer = NULL;
7862 
7863   preg->allocated = 0;
7864   preg->used = 0;
7865 
7866   if (preg->fastmap != NULL)
7867     free (preg->fastmap);
7868   preg->fastmap = NULL;
7869   preg->fastmap_accurate = 0;
7870 
7871   if (preg->translate != NULL)
7872     free (preg->translate);
7873   preg->translate = NULL;
7874 }
7875 #ifdef _LIBC
7876 weak_alias (__regfree, regfree)
7877 #endif
7878 
7879 #endif /* not emacs  */
7880