xref: /haiku/src/system/libroot/posix/glibc/regex/regexec.c (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002, 2003, 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15 
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, write to the Free
18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19    02111-1307 USA.  */
20 
21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 				     int n) internal_function;
23 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24 static void match_ctx_free (re_match_context_t *cache) internal_function;
25 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
26 					  int str_idx, int from, int to)
27      internal_function;
28 static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
29      internal_function;
30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
31 					   int str_idx) internal_function;
32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33 						   int node, int str_idx)
34      internal_function;
35 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36 			   re_dfastate_t **limited_sts, int last_node,
37 			   int last_str_idx)
38      internal_function;
39 static reg_errcode_t re_search_internal (const regex_t *preg,
40 					 const char *string, int length,
41 					 int start, int range, int stop,
42 					 size_t nmatch, regmatch_t pmatch[],
43 					 int eflags) internal_function;
44 static int re_search_2_stub (struct re_pattern_buffer *bufp,
45 			     const char *string1, int length1,
46 			     const char *string2, int length2,
47 			     int start, int range, struct re_registers *regs,
48 			     int stop, int ret_len) internal_function;
49 static int re_search_stub (struct re_pattern_buffer *bufp,
50 			   const char *string, int length, int start,
51 			   int range, int stop, struct re_registers *regs,
52 			   int ret_len) internal_function;
53 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
54 			      int nregs, int regs_allocated) internal_function;
55 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
56      internal_function;
57 static int check_matching (re_match_context_t *mctx, int fl_longest_match,
58 			   int *p_match_first) internal_function;
59 static int check_halt_state_context (const re_match_context_t *mctx,
60 				     const re_dfastate_t *state, int idx)
61      internal_function;
62 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
63 			 regmatch_t *prev_idx_match, int cur_node,
64 			 int cur_idx, int nmatch) internal_function;
65 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
66 				      int str_idx, int dest_node, int nregs,
67 				      regmatch_t *regs,
68 				      re_node_set *eps_via_nodes)
69      internal_function;
70 static reg_errcode_t set_regs (const regex_t *preg,
71 			       const re_match_context_t *mctx,
72 			       size_t nmatch, regmatch_t *pmatch,
73 			       int fl_backtrack) internal_function;
74 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
75      internal_function;
76 
77 #ifdef RE_ENABLE_I18N
78 static int sift_states_iter_mb (const re_match_context_t *mctx,
79 				re_sift_context_t *sctx,
80 				int node_idx, int str_idx, int max_str_idx)
81      internal_function;
82 #endif /* RE_ENABLE_I18N */
83 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
84 					   re_sift_context_t *sctx)
85      internal_function;
86 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
87 					  re_sift_context_t *sctx, int str_idx,
88 					  re_node_set *cur_dest)
89      internal_function;
90 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
91 					      re_sift_context_t *sctx,
92 					      int str_idx,
93 					      re_node_set *dest_nodes)
94      internal_function;
95 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
96 					    re_node_set *dest_nodes,
97 					    const re_node_set *candidates)
98      internal_function;
99 static int check_dst_limits (const re_match_context_t *mctx,
100 			     re_node_set *limits,
101 			     int dst_node, int dst_idx, int src_node,
102 			     int src_idx) internal_function;
103 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
104 					int boundaries, int subexp_idx,
105 					int from_node, int bkref_idx)
106      internal_function;
107 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
108 				      int limit, int subexp_idx,
109 				      int node, int str_idx,
110 				      int bkref_idx) internal_function;
111 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
112 					  re_node_set *dest_nodes,
113 					  const re_node_set *candidates,
114 					  re_node_set *limits,
115 					  struct re_backref_cache_entry *bkref_ents,
116 					  int str_idx) internal_function;
117 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
118 					re_sift_context_t *sctx,
119 					int str_idx, const re_node_set *candidates)
120      internal_function;
121 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
122 					re_dfastate_t **dst,
123 					re_dfastate_t **src, int num)
124      internal_function;
125 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
126 					 re_match_context_t *mctx) internal_function;
127 static re_dfastate_t *transit_state (reg_errcode_t *err,
128 				     re_match_context_t *mctx,
129 				     re_dfastate_t *state) internal_function;
130 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
131 					    re_match_context_t *mctx,
132 					    re_dfastate_t *next_state)
133      internal_function;
134 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
135 						re_node_set *cur_nodes,
136 						int str_idx) internal_function;
137 #if 0
138 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
139 					re_match_context_t *mctx,
140 					re_dfastate_t *pstate)
141      internal_function;
142 #endif
143 #ifdef RE_ENABLE_I18N
144 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
145 				       re_dfastate_t *pstate)
146      internal_function;
147 #endif /* RE_ENABLE_I18N */
148 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
149 					  const re_node_set *nodes)
150      internal_function;
151 static reg_errcode_t get_subexp (re_match_context_t *mctx,
152 				 int bkref_node, int bkref_str_idx)
153      internal_function;
154 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
155 				     const re_sub_match_top_t *sub_top,
156 				     re_sub_match_last_t *sub_last,
157 				     int bkref_node, int bkref_str)
158      internal_function;
159 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
160 			     int subexp_idx, int type) internal_function;
161 static reg_errcode_t check_arrival (re_match_context_t *mctx,
162 				    state_array_t *path, int top_node,
163 				    int top_str, int last_node, int last_str,
164 				    int type) internal_function;
165 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
166 						   int str_idx,
167 						   re_node_set *cur_nodes,
168 						   re_node_set *next_nodes)
169      internal_function;
170 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
171 					       re_node_set *cur_nodes,
172 					       int ex_subexp, int type)
173      internal_function;
174 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
175 						   re_node_set *dst_nodes,
176 						   int target, int ex_subexp,
177 						   int type) internal_function;
178 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
179 					 re_node_set *cur_nodes, int cur_str,
180 					 int subexp_num, int type)
181      internal_function;
182 static int build_trtable (const re_dfa_t *dfa,
183 			  re_dfastate_t *state) internal_function;
184 #ifdef RE_ENABLE_I18N
185 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
186 				    const re_string_t *input, int idx)
187      internal_function;
188 # ifdef _LIBC
189 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
190 						   size_t name_len)
191      internal_function;
192 # endif /* _LIBC */
193 #endif /* RE_ENABLE_I18N */
194 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
195 				       const re_dfastate_t *state,
196 				       re_node_set *states_node,
197 				       bitset_t *states_ch) internal_function;
198 static int check_node_accept (const re_match_context_t *mctx,
199 			      const re_token_t *node, int idx)
200      internal_function;
201 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
202      internal_function;
203 
204 /* Entry point for POSIX code.  */
205 
206 /* regexec searches for a given pattern, specified by PREG, in the
207    string STRING.
208 
209    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
210    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
211    least NMATCH elements, and we set them to the offsets of the
212    corresponding matched substrings.
213 
214    EFLAGS specifies `execution flags' which affect matching: if
215    REG_NOTBOL is set, then ^ does not match at the beginning of the
216    string; if REG_NOTEOL is set, then $ does not match at the end.
217 
218    We return 0 if we find a match and REG_NOMATCH if not.  */
219 
220 int
221 regexec (preg, string, nmatch, pmatch, eflags)
222     const regex_t *__restrict preg;
223     const char *__restrict string;
224     size_t nmatch;
225     regmatch_t pmatch[];
226     int eflags;
227 {
228   reg_errcode_t err;
229   int start, length;
230   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
231 
232   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
233     return REG_BADPAT;
234 
235   if (eflags & REG_STARTEND)
236     {
237       start = pmatch[0].rm_so;
238       length = pmatch[0].rm_eo;
239     }
240   else
241     {
242       start = 0;
243       length = strlen (string);
244     }
245 
246   __libc_lock_lock (dfa->lock);
247   if (preg->no_sub)
248     err = re_search_internal (preg, string, length, start, length - start,
249 			      length, 0, NULL, eflags);
250   else
251     err = re_search_internal (preg, string, length, start, length - start,
252 			      length, nmatch, pmatch, eflags);
253   __libc_lock_unlock (dfa->lock);
254   return err != REG_NOERROR;
255 }
256 
257 #ifdef _LIBC
258 # include <shlib-compat.h>
259 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
260 
261 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
262 __typeof__ (__regexec) __compat_regexec;
263 
264 int
265 attribute_compat_text_section
266 __compat_regexec (const regex_t *__restrict preg,
267 		  const char *__restrict string, size_t nmatch,
268 		  regmatch_t pmatch[], int eflags)
269 {
270   return regexec (preg, string, nmatch, pmatch,
271 		  eflags & (REG_NOTBOL | REG_NOTEOL));
272 }
273 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
274 # endif
275 #endif
276 
277 /* Entry points for GNU code.  */
278 
279 /* re_match, re_search, re_match_2, re_search_2
280 
281    The former two functions operate on STRING with length LENGTH,
282    while the later two operate on concatenation of STRING1 and STRING2
283    with lengths LENGTH1 and LENGTH2, respectively.
284 
285    re_match() matches the compiled pattern in BUFP against the string,
286    starting at index START.
287 
288    re_search() first tries matching at index START, then it tries to match
289    starting from index START + 1, and so on.  The last start position tried
290    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
291    way as re_match().)
292 
293    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
294    the first STOP characters of the concatenation of the strings should be
295    concerned.
296 
297    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
298    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
299    computed relative to the concatenation, not relative to the individual
300    strings.)
301 
302    On success, re_match* functions return the length of the match, re_search*
303    return the position of the start of the match.  Return value -1 means no
304    match was found and -2 indicates an internal error.  */
305 
306 int
307 re_match (bufp, string, length, start, regs)
308     struct re_pattern_buffer *bufp;
309     const char *string;
310     int length, start;
311     struct re_registers *regs;
312 {
313   return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
314 }
315 #ifdef _LIBC
316 weak_alias (__re_match, re_match)
317 #endif
318 
319 int
320 re_search (bufp, string, length, start, range, regs)
321     struct re_pattern_buffer *bufp;
322     const char *string;
323     int length, start, range;
324     struct re_registers *regs;
325 {
326   return re_search_stub (bufp, string, length, start, range, length, regs, 0);
327 }
328 #ifdef _LIBC
329 weak_alias (__re_search, re_search)
330 #endif
331 
332 int
333 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
334     struct re_pattern_buffer *bufp;
335     const char *string1, *string2;
336     int length1, length2, start, stop;
337     struct re_registers *regs;
338 {
339   return re_search_2_stub (bufp, string1, length1, string2, length2,
340 			   start, 0, regs, stop, 1);
341 }
342 #ifdef _LIBC
343 weak_alias (__re_match_2, re_match_2)
344 #endif
345 
346 int
347 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
348     struct re_pattern_buffer *bufp;
349     const char *string1, *string2;
350     int length1, length2, start, range, stop;
351     struct re_registers *regs;
352 {
353   return re_search_2_stub (bufp, string1, length1, string2, length2,
354 			   start, range, regs, stop, 0);
355 }
356 #ifdef _LIBC
357 weak_alias (__re_search_2, re_search_2)
358 #endif
359 
360 static int
361 re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
362 		  stop, ret_len)
363     struct re_pattern_buffer *bufp;
364     const char *string1, *string2;
365     int length1, length2, start, range, stop, ret_len;
366     struct re_registers *regs;
367 {
368   const char *str;
369   int rval;
370   int len = length1 + length2;
371   int free_str = 0;
372 
373   if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
374     return -2;
375 
376   /* Concatenate the strings.  */
377   if (length2 > 0)
378     if (length1 > 0)
379       {
380 	char *s = re_malloc (char, len);
381 
382 	if (BE (s == NULL, 0))
383 	  return -2;
384 #ifdef _LIBC
385 	memcpy (__mempcpy (s, string1, length1), string2, length2);
386 #else
387 	memcpy (s, string1, length1);
388 	memcpy (s + length1, string2, length2);
389 #endif
390 	str = s;
391 	free_str = 1;
392       }
393     else
394       str = string2;
395   else
396     str = string1;
397 
398   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
399 			 ret_len);
400   if (free_str)
401     re_free ((char *) str);
402   return rval;
403 }
404 
405 /* The parameters have the same meaning as those of re_search.
406    Additional parameters:
407    If RET_LEN is nonzero the length of the match is returned (re_match style);
408    otherwise the position of the match is returned.  */
409 
410 static int
411 re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
412     struct re_pattern_buffer *bufp;
413     const char *string;
414     int length, start, range, stop, ret_len;
415     struct re_registers *regs;
416 {
417   reg_errcode_t result;
418   regmatch_t *pmatch;
419   int nregs, rval;
420   int eflags = 0;
421   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
422 
423   /* Check for out-of-range.  */
424   if (BE (start < 0 || start > length, 0))
425     return -1;
426   if (BE (start + range > length, 0))
427     range = length - start;
428   else if (BE (start + range < 0, 0))
429     range = -start;
430 
431   __libc_lock_lock (dfa->lock);
432 
433   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
434   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
435 
436   /* Compile fastmap if we haven't yet.  */
437   if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
438     re_compile_fastmap (bufp);
439 
440   if (BE (bufp->no_sub, 0))
441     regs = NULL;
442 
443   /* We need at least 1 register.  */
444   if (regs == NULL)
445     nregs = 1;
446   else if (BE (bufp->regs_allocated == REGS_FIXED &&
447 	       regs->num_regs < bufp->re_nsub + 1, 0))
448     {
449       nregs = regs->num_regs;
450       if (BE (nregs < 1, 0))
451 	{
452 	  /* Nothing can be copied to regs.  */
453 	  regs = NULL;
454 	  nregs = 1;
455 	}
456     }
457   else
458     nregs = bufp->re_nsub + 1;
459   pmatch = re_malloc (regmatch_t, nregs);
460   if (BE (pmatch == NULL, 0))
461     {
462       rval = -2;
463       goto out;
464     }
465 
466   result = re_search_internal (bufp, string, length, start, range, stop,
467 			       nregs, pmatch, eflags);
468 
469   rval = 0;
470 
471   /* I hope we needn't fill ther regs with -1's when no match was found.  */
472   if (result != REG_NOERROR)
473     rval = -1;
474   else if (regs != NULL)
475     {
476       /* If caller wants register contents data back, copy them.  */
477       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
478 					   bufp->regs_allocated);
479       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
480 	rval = -2;
481     }
482 
483   if (BE (rval == 0, 1))
484     {
485       if (ret_len)
486 	{
487 	  assert (pmatch[0].rm_so == start);
488 	  rval = pmatch[0].rm_eo - start;
489 	}
490       else
491 	rval = pmatch[0].rm_so;
492     }
493   re_free (pmatch);
494  out:
495   __libc_lock_unlock (dfa->lock);
496   return rval;
497 }
498 
499 static unsigned
500 re_copy_regs (regs, pmatch, nregs, regs_allocated)
501     struct re_registers *regs;
502     regmatch_t *pmatch;
503     int nregs, regs_allocated;
504 {
505   int rval = REGS_REALLOCATE;
506   int i;
507   int need_regs = nregs + 1;
508   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
509      uses.  */
510 
511   /* Have the register data arrays been allocated?  */
512   if (regs_allocated == REGS_UNALLOCATED)
513     { /* No.  So allocate them with malloc.  */
514       regs->start = re_malloc (regoff_t, need_regs);
515       regs->end = re_malloc (regoff_t, need_regs);
516       if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
517 	return REGS_UNALLOCATED;
518       regs->num_regs = need_regs;
519     }
520   else if (regs_allocated == REGS_REALLOCATE)
521     { /* Yes.  If we need more elements than were already
522 	 allocated, reallocate them.  If we need fewer, just
523 	 leave it alone.  */
524       if (BE (need_regs > regs->num_regs, 0))
525 	{
526 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
527 	  regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
528 	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
529 	    return REGS_UNALLOCATED;
530 	  regs->start = new_start;
531 	  regs->end = new_end;
532 	  regs->num_regs = need_regs;
533 	}
534     }
535   else
536     {
537       assert (regs_allocated == REGS_FIXED);
538       /* This function may not be called with REGS_FIXED and nregs too big.  */
539       assert (regs->num_regs >= nregs);
540       rval = REGS_FIXED;
541     }
542 
543   /* Copy the regs.  */
544   for (i = 0; i < nregs; ++i)
545     {
546       regs->start[i] = pmatch[i].rm_so;
547       regs->end[i] = pmatch[i].rm_eo;
548     }
549   for ( ; i < regs->num_regs; ++i)
550     regs->start[i] = regs->end[i] = -1;
551 
552   return rval;
553 }
554 
555 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
556    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
557    this memory for recording register information.  STARTS and ENDS
558    must be allocated using the malloc library routine, and must each
559    be at least NUM_REGS * sizeof (regoff_t) bytes long.
560 
561    If NUM_REGS == 0, then subsequent matches should allocate their own
562    register data.
563 
564    Unless this function is called, the first search or match using
565    PATTERN_BUFFER will allocate its own register data, without
566    freeing the old data.  */
567 
568 void
569 re_set_registers (bufp, regs, num_regs, starts, ends)
570     struct re_pattern_buffer *bufp;
571     struct re_registers *regs;
572     unsigned num_regs;
573     regoff_t *starts, *ends;
574 {
575   if (num_regs)
576     {
577       bufp->regs_allocated = REGS_REALLOCATE;
578       regs->num_regs = num_regs;
579       regs->start = starts;
580       regs->end = ends;
581     }
582   else
583     {
584       bufp->regs_allocated = REGS_UNALLOCATED;
585       regs->num_regs = 0;
586       regs->start = regs->end = (regoff_t *) 0;
587     }
588 }
589 #ifdef _LIBC
590 weak_alias (__re_set_registers, re_set_registers)
591 #endif
592 
593 /* Entry points compatible with 4.2 BSD regex library.  We don't define
594    them unless specifically requested.  */
595 
596 #if defined _REGEX_RE_COMP || defined _LIBC
597 int
598 # ifdef _LIBC
599 weak_function
600 # endif
601 re_exec (s)
602      const char *s;
603 {
604   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
605 }
606 #endif /* _REGEX_RE_COMP */
607 
608 /* Internal entry point.  */
609 
610 /* Searches for a compiled pattern PREG in the string STRING, whose
611    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
612    mingings with regexec.  START, and RANGE have the same meanings
613    with re_search.
614    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
615    otherwise return the error code.
616    Note: We assume front end functions already check ranges.
617    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
618 
619 static reg_errcode_t
620 re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
621 		    eflags)
622     const regex_t *preg;
623     const char *string;
624     int length, start, range, stop, eflags;
625     size_t nmatch;
626     regmatch_t pmatch[];
627 {
628   reg_errcode_t err;
629   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
630   int left_lim, right_lim, incr;
631   int fl_longest_match, match_first, match_kind, match_last = -1;
632   int extra_nmatch;
633   int sb, ch;
634 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
635   re_match_context_t mctx = { .dfa = dfa };
636 #else
637   re_match_context_t mctx;
638 #endif
639   char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
640 		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
641   RE_TRANSLATE_TYPE t = preg->translate;
642 
643 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
644   memset (&mctx, '\0', sizeof (re_match_context_t));
645   mctx.dfa = dfa;
646 #endif
647 
648   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
649   nmatch -= extra_nmatch;
650 
651   /* Check if the DFA haven't been compiled.  */
652   if (BE (preg->used == 0 || dfa->init_state == NULL
653 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
654 	  || dfa->init_state_begbuf == NULL, 0))
655     return REG_NOMATCH;
656 
657 #ifdef DEBUG
658   /* We assume front-end functions already check them.  */
659   assert (start + range >= 0 && start + range <= length);
660 #endif
661 
662   /* If initial states with non-begbuf contexts have no elements,
663      the regex must be anchored.  If preg->newline_anchor is set,
664      we'll never use init_state_nl, so do not check it.  */
665   if (dfa->init_state->nodes.nelem == 0
666       && dfa->init_state_word->nodes.nelem == 0
667       && (dfa->init_state_nl->nodes.nelem == 0
668 	  || !preg->newline_anchor))
669     {
670       if (start != 0 && start + range != 0)
671         return REG_NOMATCH;
672       start = range = 0;
673     }
674 
675   /* We must check the longest matching, if nmatch > 0.  */
676   fl_longest_match = (nmatch != 0 || dfa->nbackref);
677 
678   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
679 			    preg->translate, preg->syntax & RE_ICASE, dfa);
680   if (BE (err != REG_NOERROR, 0))
681     goto free_return;
682   mctx.input.stop = stop;
683   mctx.input.raw_stop = stop;
684   mctx.input.newline_anchor = preg->newline_anchor;
685 
686   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
687   if (BE (err != REG_NOERROR, 0))
688     goto free_return;
689 
690   /* We will log all the DFA states through which the dfa pass,
691      if nmatch > 1, or this dfa has "multibyte node", which is a
692      back-reference or a node which can accept multibyte character or
693      multi character collating element.  */
694   if (nmatch > 1 || dfa->has_mb_node)
695     {
696       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
697       if (BE (mctx.state_log == NULL, 0))
698 	{
699 	  err = REG_ESPACE;
700 	  goto free_return;
701 	}
702     }
703   else
704     mctx.state_log = NULL;
705 
706   match_first = start;
707   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
708 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
709 
710   /* Check incrementally whether of not the input string match.  */
711   incr = (range < 0) ? -1 : 1;
712   left_lim = (range < 0) ? start + range : start;
713   right_lim = (range < 0) ? start : start + range;
714   sb = dfa->mb_cur_max == 1;
715   match_kind =
716     (fastmap
717      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
718 	| (range >= 0 ? 2 : 0)
719 	| (t != NULL ? 1 : 0))
720      : 8);
721 
722   for (;; match_first += incr)
723     {
724       err = REG_NOMATCH;
725       if (match_first < left_lim || right_lim < match_first)
726 	goto free_return;
727 
728       /* Advance as rapidly as possible through the string, until we
729 	 find a plausible place to start matching.  This may be done
730 	 with varying efficiency, so there are various possibilities:
731 	 only the most common of them are specialized, in order to
732 	 save on code size.  We use a switch statement for speed.  */
733       switch (match_kind)
734 	{
735 	case 8:
736 	  /* No fastmap.  */
737 	  break;
738 
739 	case 7:
740 	  /* Fastmap with single-byte translation, match forward.  */
741 	  while (BE (match_first < right_lim, 1)
742 		 && !fastmap[t[(unsigned char) string[match_first]]])
743 	    ++match_first;
744 	  goto forward_match_found_start_or_reached_end;
745 
746 	case 6:
747 	  /* Fastmap without translation, match forward.  */
748 	  while (BE (match_first < right_lim, 1)
749 		 && !fastmap[(unsigned char) string[match_first]])
750 	    ++match_first;
751 
752 	forward_match_found_start_or_reached_end:
753 	  if (BE (match_first == right_lim, 0))
754 	    {
755 	      ch = match_first >= length
756 		       ? 0 : (unsigned char) string[match_first];
757 	      if (!fastmap[t ? t[ch] : ch])
758 		goto free_return;
759 	    }
760 	  break;
761 
762 	case 4:
763 	case 5:
764 	  /* Fastmap without multi-byte translation, match backwards.  */
765 	  while (match_first >= left_lim)
766 	    {
767 	      ch = match_first >= length
768 		       ? 0 : (unsigned char) string[match_first];
769 	      if (fastmap[t ? t[ch] : ch])
770 		break;
771 	      --match_first;
772 	    }
773 	  if (match_first < left_lim)
774 	    goto free_return;
775 	  break;
776 
777 	default:
778 	  /* In this case, we can't determine easily the current byte,
779 	     since it might be a component byte of a multibyte
780 	     character.  Then we use the constructed buffer instead.  */
781 	  for (;;)
782 	    {
783 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
784 		 buffers.  */
785 	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
786 	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
787 		{
788 		  err = re_string_reconstruct (&mctx.input, match_first,
789 					       eflags);
790 		  if (BE (err != REG_NOERROR, 0))
791 		    goto free_return;
792 
793 		  offset = match_first - mctx.input.raw_mbs_idx;
794 		}
795 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
796 		 Note that MATCH_FIRST must not be smaller than 0.  */
797 	      ch = (match_first >= length
798 		    ? 0 : re_string_byte_at (&mctx.input, offset));
799 	      if (fastmap[ch])
800 		break;
801 	      match_first += incr;
802 	      if (match_first < left_lim || match_first > right_lim)
803 	        {
804 	          err = REG_NOMATCH;
805 	          goto free_return;
806 	        }
807 	    }
808 	  break;
809 	}
810 
811       /* Reconstruct the buffers so that the matcher can assume that
812 	 the matching starts from the beginning of the buffer.  */
813       err = re_string_reconstruct (&mctx.input, match_first, eflags);
814       if (BE (err != REG_NOERROR, 0))
815 	goto free_return;
816 
817 #ifdef RE_ENABLE_I18N
818      /* Don't consider this char as a possible match start if it part,
819 	yet isn't the head, of a multibyte character.  */
820       if (!sb && !re_string_first_byte (&mctx.input, 0))
821 	continue;
822 #endif
823 
824       /* It seems to be appropriate one, then use the matcher.  */
825       /* We assume that the matching starts from 0.  */
826       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
827       match_last = check_matching (&mctx, fl_longest_match,
828 				   range >= 0 ? &match_first : NULL);
829       if (match_last != -1)
830 	{
831 	  if (BE (match_last == -2, 0))
832 	    {
833 	      err = REG_ESPACE;
834 	      goto free_return;
835 	    }
836 	  else
837 	    {
838 	      mctx.match_last = match_last;
839 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
840 		{
841 		  re_dfastate_t *pstate = mctx.state_log[match_last];
842 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
843 							     match_last);
844 		}
845 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
846 		  || dfa->nbackref)
847 		{
848 		  err = prune_impossible_nodes (&mctx);
849 		  if (err == REG_NOERROR)
850 		    break;
851 		  if (BE (err != REG_NOMATCH, 0))
852 		    goto free_return;
853 		  match_last = -1;
854 		}
855 	      else
856 		break; /* We found a match.  */
857 	    }
858 	}
859 
860       match_ctx_clean (&mctx);
861     }
862 
863 #ifdef DEBUG
864   assert (match_last != -1);
865   assert (err == REG_NOERROR);
866 #endif
867 
868   /* Set pmatch[] if we need.  */
869   if (nmatch > 0)
870     {
871       int reg_idx;
872 
873       /* Initialize registers.  */
874       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
875 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
876 
877       /* Set the points where matching start/end.  */
878       pmatch[0].rm_so = 0;
879       pmatch[0].rm_eo = mctx.match_last;
880 
881       if (!preg->no_sub && nmatch > 1)
882 	{
883 	  err = set_regs (preg, &mctx, nmatch, pmatch,
884 			  dfa->has_plural_match && dfa->nbackref > 0);
885 	  if (BE (err != REG_NOERROR, 0))
886 	    goto free_return;
887 	}
888 
889       /* At last, add the offset to the each registers, since we slided
890 	 the buffers so that we could assume that the matching starts
891 	 from 0.  */
892       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
893 	if (pmatch[reg_idx].rm_so != -1)
894 	  {
895 #ifdef RE_ENABLE_I18N
896 	    if (BE (mctx.input.offsets_needed != 0, 0))
897 	      {
898 		pmatch[reg_idx].rm_so =
899 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
900 		   ? mctx.input.valid_raw_len
901 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
902 		pmatch[reg_idx].rm_eo =
903 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
904 		   ? mctx.input.valid_raw_len
905 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
906 	      }
907 #else
908 	    assert (mctx.input.offsets_needed == 0);
909 #endif
910 	    pmatch[reg_idx].rm_so += match_first;
911 	    pmatch[reg_idx].rm_eo += match_first;
912 	  }
913       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
914 	{
915 	  pmatch[nmatch + reg_idx].rm_so = -1;
916 	  pmatch[nmatch + reg_idx].rm_eo = -1;
917 	}
918 
919       if (dfa->subexp_map)
920         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
921           if (dfa->subexp_map[reg_idx] != reg_idx)
922             {
923               pmatch[reg_idx + 1].rm_so
924                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
925               pmatch[reg_idx + 1].rm_eo
926                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
927             }
928     }
929 
930  free_return:
931   re_free (mctx.state_log);
932   if (dfa->nbackref)
933     match_ctx_free (&mctx);
934   re_string_destruct (&mctx.input);
935   return err;
936 }
937 
938 static reg_errcode_t
939 prune_impossible_nodes (mctx)
940      re_match_context_t *mctx;
941 {
942   const re_dfa_t *const dfa = mctx->dfa;
943   int halt_node, match_last;
944   reg_errcode_t ret;
945   re_dfastate_t **sifted_states;
946   re_dfastate_t **lim_states = NULL;
947   re_sift_context_t sctx;
948 #ifdef DEBUG
949   assert (mctx->state_log != NULL);
950 #endif
951   match_last = mctx->match_last;
952   halt_node = mctx->last_node;
953   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
954   if (BE (sifted_states == NULL, 0))
955     {
956       ret = REG_ESPACE;
957       goto free_return;
958     }
959   if (dfa->nbackref)
960     {
961       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
962       if (BE (lim_states == NULL, 0))
963 	{
964 	  ret = REG_ESPACE;
965 	  goto free_return;
966 	}
967       while (1)
968 	{
969 	  memset (lim_states, '\0',
970 		  sizeof (re_dfastate_t *) * (match_last + 1));
971 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
972 			 match_last);
973 	  ret = sift_states_backward (mctx, &sctx);
974 	  re_node_set_free (&sctx.limits);
975 	  if (BE (ret != REG_NOERROR, 0))
976 	      goto free_return;
977 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
978 	    break;
979 	  do
980 	    {
981 	      --match_last;
982 	      if (match_last < 0)
983 		{
984 		  ret = REG_NOMATCH;
985 		  goto free_return;
986 		}
987 	    } while (mctx->state_log[match_last] == NULL
988 		     || !mctx->state_log[match_last]->halt);
989 	  halt_node = check_halt_state_context (mctx,
990 						mctx->state_log[match_last],
991 						match_last);
992 	}
993       ret = merge_state_array (dfa, sifted_states, lim_states,
994 			       match_last + 1);
995       re_free (lim_states);
996       lim_states = NULL;
997       if (BE (ret != REG_NOERROR, 0))
998 	goto free_return;
999     }
1000   else
1001     {
1002       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1003       ret = sift_states_backward (mctx, &sctx);
1004       re_node_set_free (&sctx.limits);
1005       if (BE (ret != REG_NOERROR, 0))
1006 	goto free_return;
1007       if (sifted_states[0] == NULL)
1008 	{
1009 	  ret = REG_NOMATCH;
1010 	  goto free_return;
1011 	}
1012     }
1013   re_free (mctx->state_log);
1014   mctx->state_log = sifted_states;
1015   sifted_states = NULL;
1016   mctx->last_node = halt_node;
1017   mctx->match_last = match_last;
1018   ret = REG_NOERROR;
1019  free_return:
1020   re_free (sifted_states);
1021   re_free (lim_states);
1022   return ret;
1023 }
1024 
1025 /* Acquire an initial state and return it.
1026    We must select appropriate initial state depending on the context,
1027    since initial states may have constraints like "\<", "^", etc..  */
1028 
1029 static inline re_dfastate_t *
1030 __attribute ((always_inline)) internal_function
1031 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1032 			    int idx)
1033 {
1034   const re_dfa_t *const dfa = mctx->dfa;
1035   if (dfa->init_state->has_constraint)
1036     {
1037       unsigned int context;
1038       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1039       if (IS_WORD_CONTEXT (context))
1040 	return dfa->init_state_word;
1041       else if (IS_ORDINARY_CONTEXT (context))
1042 	return dfa->init_state;
1043       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1044 	return dfa->init_state_begbuf;
1045       else if (IS_NEWLINE_CONTEXT (context))
1046 	return dfa->init_state_nl;
1047       else if (IS_BEGBUF_CONTEXT (context))
1048 	{
1049 	  /* It is relatively rare case, then calculate on demand.  */
1050 	  return re_acquire_state_context (err, dfa,
1051 					   dfa->init_state->entrance_nodes,
1052 					   context);
1053 	}
1054       else
1055 	/* Must not happen?  */
1056 	return dfa->init_state;
1057     }
1058   else
1059     return dfa->init_state;
1060 }
1061 
1062 /* Check whether the regular expression match input string INPUT or not,
1063    and return the index where the matching end, return -1 if not match,
1064    or return -2 in case of an error.
1065    FL_LONGEST_MATCH means we want the POSIX longest matching.
1066    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1067    next place where we may want to try matching.
1068    Note that the matcher assume that the maching starts from the current
1069    index of the buffer.  */
1070 
1071 static int
1072 internal_function
1073 check_matching (re_match_context_t *mctx, int fl_longest_match,
1074 		int *p_match_first)
1075 {
1076   const re_dfa_t *const dfa = mctx->dfa;
1077   reg_errcode_t err;
1078   int match = 0;
1079   int match_last = -1;
1080   int cur_str_idx = re_string_cur_idx (&mctx->input);
1081   re_dfastate_t *cur_state;
1082   int at_init_state = p_match_first != NULL;
1083   int next_start_idx = cur_str_idx;
1084 
1085   err = REG_NOERROR;
1086   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1087   /* An initial state must not be NULL (invalid).  */
1088   if (BE (cur_state == NULL, 0))
1089     {
1090       assert (err == REG_ESPACE);
1091       return -2;
1092     }
1093 
1094   if (mctx->state_log != NULL)
1095     {
1096       mctx->state_log[cur_str_idx] = cur_state;
1097 
1098       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1099 	 later.  E.g. Processing back references.  */
1100       if (BE (dfa->nbackref, 0))
1101 	{
1102 	  at_init_state = 0;
1103 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1104 	  if (BE (err != REG_NOERROR, 0))
1105 	    return err;
1106 
1107 	  if (cur_state->has_backref)
1108 	    {
1109 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1110 	      if (BE (err != REG_NOERROR, 0))
1111 	        return err;
1112 	    }
1113 	}
1114     }
1115 
1116   /* If the RE accepts NULL string.  */
1117   if (BE (cur_state->halt, 0))
1118     {
1119       if (!cur_state->has_constraint
1120 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1121 	{
1122 	  if (!fl_longest_match)
1123 	    return cur_str_idx;
1124 	  else
1125 	    {
1126 	      match_last = cur_str_idx;
1127 	      match = 1;
1128 	    }
1129 	}
1130     }
1131 
1132   while (!re_string_eoi (&mctx->input))
1133     {
1134       re_dfastate_t *old_state = cur_state;
1135       int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1136 
1137       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1138           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1139               && mctx->input.valid_len < mctx->input.len))
1140         {
1141           err = extend_buffers (mctx);
1142           if (BE (err != REG_NOERROR, 0))
1143 	    {
1144 	      assert (err == REG_ESPACE);
1145 	      return -2;
1146 	    }
1147         }
1148 
1149       cur_state = transit_state (&err, mctx, cur_state);
1150       if (mctx->state_log != NULL)
1151 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1152 
1153       if (cur_state == NULL)
1154 	{
1155 	  /* Reached the invalid state or an error.  Try to recover a valid
1156 	     state using the state log, if available and if we have not
1157 	     already found a valid (even if not the longest) match.  */
1158 	  if (BE (err != REG_NOERROR, 0))
1159 	    return -2;
1160 
1161 	  if (mctx->state_log == NULL
1162 	      || (match && !fl_longest_match)
1163 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1164 	    break;
1165 	}
1166 
1167       if (BE (at_init_state, 0))
1168 	{
1169 	  if (old_state == cur_state)
1170 	    next_start_idx = next_char_idx;
1171 	  else
1172 	    at_init_state = 0;
1173 	}
1174 
1175       if (cur_state->halt)
1176 	{
1177 	  /* Reached a halt state.
1178 	     Check the halt state can satisfy the current context.  */
1179 	  if (!cur_state->has_constraint
1180 	      || check_halt_state_context (mctx, cur_state,
1181 					   re_string_cur_idx (&mctx->input)))
1182 	    {
1183 	      /* We found an appropriate halt state.  */
1184 	      match_last = re_string_cur_idx (&mctx->input);
1185 	      match = 1;
1186 
1187 	      /* We found a match, do not modify match_first below.  */
1188 	      p_match_first = NULL;
1189 	      if (!fl_longest_match)
1190 		break;
1191 	    }
1192 	}
1193     }
1194 
1195   if (p_match_first)
1196     *p_match_first += next_start_idx;
1197 
1198   return match_last;
1199 }
1200 
1201 /* Check NODE match the current context.  */
1202 
1203 static int
1204 internal_function
1205 check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1206 {
1207   re_token_type_t type = dfa->nodes[node].type;
1208   unsigned int constraint = dfa->nodes[node].constraint;
1209   if (type != END_OF_RE)
1210     return 0;
1211   if (!constraint)
1212     return 1;
1213   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1214     return 0;
1215   return 1;
1216 }
1217 
1218 /* Check the halt state STATE match the current context.
1219    Return 0 if not match, if the node, STATE has, is a halt node and
1220    match the context, return the node.  */
1221 
1222 static int
1223 internal_function
1224 check_halt_state_context (const re_match_context_t *mctx,
1225 			  const re_dfastate_t *state, int idx)
1226 {
1227   int i;
1228   unsigned int context;
1229 #ifdef DEBUG
1230   assert (state->halt);
1231 #endif
1232   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1233   for (i = 0; i < state->nodes.nelem; ++i)
1234     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1235       return state->nodes.elems[i];
1236   return 0;
1237 }
1238 
1239 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1240    corresponding to the DFA).
1241    Return the destination node, and update EPS_VIA_NODES, return -1 in case
1242    of errors.  */
1243 
1244 static int
1245 internal_function
1246 proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1247 		   int *pidx, int node, re_node_set *eps_via_nodes,
1248 		   struct re_fail_stack_t *fs)
1249 {
1250   const re_dfa_t *const dfa = mctx->dfa;
1251   int i, err;
1252   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1253     {
1254       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1255       re_node_set *edests = &dfa->edests[node];
1256       int dest_node;
1257       err = re_node_set_insert (eps_via_nodes, node);
1258       if (BE (err < 0, 0))
1259 	return -2;
1260       /* Pick up a valid destination, or return -1 if none is found.  */
1261       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1262 	{
1263 	  int candidate = edests->elems[i];
1264 	  if (!re_node_set_contains (cur_nodes, candidate))
1265 	    continue;
1266           if (dest_node == -1)
1267 	    dest_node = candidate;
1268 
1269           else
1270 	    {
1271 	      /* In order to avoid infinite loop like "(a*)*", return the second
1272 	         epsilon-transition if the first was already considered.  */
1273 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1274 	        return candidate;
1275 
1276 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1277 	      else if (fs != NULL
1278 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1279 				           eps_via_nodes))
1280 		return -2;
1281 
1282 	      /* We know we are going to exit.  */
1283 	      break;
1284 	    }
1285 	}
1286       return dest_node;
1287     }
1288   else
1289     {
1290       int naccepted = 0;
1291       re_token_type_t type = dfa->nodes[node].type;
1292 
1293 #ifdef RE_ENABLE_I18N
1294       if (dfa->nodes[node].accept_mb)
1295 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1296       else
1297 #endif /* RE_ENABLE_I18N */
1298       if (type == OP_BACK_REF)
1299 	{
1300 	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
1301 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1302 	  if (fs != NULL)
1303 	    {
1304 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1305 		return -1;
1306 	      else if (naccepted)
1307 		{
1308 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1309 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1310 			      naccepted) != 0)
1311 		    return -1;
1312 		}
1313 	    }
1314 
1315 	  if (naccepted == 0)
1316 	    {
1317 	      int dest_node;
1318 	      err = re_node_set_insert (eps_via_nodes, node);
1319 	      if (BE (err < 0, 0))
1320 		return -2;
1321 	      dest_node = dfa->edests[node].elems[0];
1322 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1323 					dest_node))
1324 		return dest_node;
1325 	    }
1326 	}
1327 
1328       if (naccepted != 0
1329 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1330 	{
1331 	  int dest_node = dfa->nexts[node];
1332 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1333 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1334 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1335 					       dest_node)))
1336 	    return -1;
1337 	  re_node_set_empty (eps_via_nodes);
1338 	  return dest_node;
1339 	}
1340     }
1341   return -1;
1342 }
1343 
1344 static reg_errcode_t
1345 internal_function
1346 push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1347 		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1348 {
1349   reg_errcode_t err;
1350   int num = fs->num++;
1351   if (fs->num == fs->alloc)
1352     {
1353       struct re_fail_stack_ent_t *new_array;
1354       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1355 				       * fs->alloc * 2));
1356       if (new_array == NULL)
1357 	return REG_ESPACE;
1358       fs->alloc *= 2;
1359       fs->stack = new_array;
1360     }
1361   fs->stack[num].idx = str_idx;
1362   fs->stack[num].node = dest_node;
1363   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1364   if (fs->stack[num].regs == NULL)
1365     return REG_ESPACE;
1366   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1367   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1368   return err;
1369 }
1370 
1371 static int
1372 internal_function
1373 pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1374 		regmatch_t *regs, re_node_set *eps_via_nodes)
1375 {
1376   int num = --fs->num;
1377   assert (num >= 0);
1378   *pidx = fs->stack[num].idx;
1379   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1380   re_node_set_free (eps_via_nodes);
1381   re_free (fs->stack[num].regs);
1382   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1383   return fs->stack[num].node;
1384 }
1385 
1386 /* Set the positions where the subexpressions are starts/ends to registers
1387    PMATCH.
1388    Note: We assume that pmatch[0] is already set, and
1389    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1390 
1391 static reg_errcode_t
1392 internal_function
1393 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1394 	  regmatch_t *pmatch, int fl_backtrack)
1395 {
1396   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1397   int idx, cur_node;
1398   re_node_set eps_via_nodes;
1399   struct re_fail_stack_t *fs;
1400   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1401   regmatch_t *prev_idx_match;
1402   int prev_idx_match_malloced = 0;
1403 
1404 #ifdef DEBUG
1405   assert (nmatch > 1);
1406   assert (mctx->state_log != NULL);
1407 #endif
1408   if (fl_backtrack)
1409     {
1410       fs = &fs_body;
1411       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1412       if (fs->stack == NULL)
1413 	return REG_ESPACE;
1414     }
1415   else
1416     fs = NULL;
1417 
1418   cur_node = dfa->init_node;
1419   re_node_set_init_empty (&eps_via_nodes);
1420 
1421   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1422     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1423   else
1424     {
1425       prev_idx_match = re_malloc (regmatch_t, nmatch);
1426       if (prev_idx_match == NULL)
1427 	{
1428 	  free_fail_stack_return (fs);
1429 	  return REG_ESPACE;
1430 	}
1431       prev_idx_match_malloced = 1;
1432     }
1433   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1434 
1435   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1436     {
1437       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1438 
1439       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1440 	{
1441 	  int reg_idx;
1442 	  if (fs)
1443 	    {
1444 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1445 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1446 		  break;
1447 	      if (reg_idx == nmatch)
1448 		{
1449 		  re_node_set_free (&eps_via_nodes);
1450 		  if (prev_idx_match_malloced)
1451 		    re_free (prev_idx_match);
1452 		  return free_fail_stack_return (fs);
1453 		}
1454 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1455 					 &eps_via_nodes);
1456 	    }
1457 	  else
1458 	    {
1459 	      re_node_set_free (&eps_via_nodes);
1460 	      if (prev_idx_match_malloced)
1461 		re_free (prev_idx_match);
1462 	      return REG_NOERROR;
1463 	    }
1464 	}
1465 
1466       /* Proceed to next node.  */
1467       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1468 				    &eps_via_nodes, fs);
1469 
1470       if (BE (cur_node < 0, 0))
1471 	{
1472 	  if (BE (cur_node == -2, 0))
1473 	    {
1474 	      re_node_set_free (&eps_via_nodes);
1475 	      if (prev_idx_match_malloced)
1476 		re_free (prev_idx_match);
1477 	      free_fail_stack_return (fs);
1478 	      return REG_ESPACE;
1479 	    }
1480 	  if (fs)
1481 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1482 				       &eps_via_nodes);
1483 	  else
1484 	    {
1485 	      re_node_set_free (&eps_via_nodes);
1486 	      if (prev_idx_match_malloced)
1487 		re_free (prev_idx_match);
1488 	      return REG_NOMATCH;
1489 	    }
1490 	}
1491     }
1492   re_node_set_free (&eps_via_nodes);
1493   if (prev_idx_match_malloced)
1494     re_free (prev_idx_match);
1495   return free_fail_stack_return (fs);
1496 }
1497 
1498 static reg_errcode_t
1499 internal_function
1500 free_fail_stack_return (struct re_fail_stack_t *fs)
1501 {
1502   if (fs)
1503     {
1504       int fs_idx;
1505       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1506 	{
1507 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1508 	  re_free (fs->stack[fs_idx].regs);
1509 	}
1510       re_free (fs->stack);
1511     }
1512   return REG_NOERROR;
1513 }
1514 
1515 static void
1516 internal_function
1517 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1518 	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1519 {
1520   int type = dfa->nodes[cur_node].type;
1521   if (type == OP_OPEN_SUBEXP)
1522     {
1523       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1524 
1525       /* We are at the first node of this sub expression.  */
1526       if (reg_num < nmatch)
1527 	{
1528 	  pmatch[reg_num].rm_so = cur_idx;
1529 	  pmatch[reg_num].rm_eo = -1;
1530 	}
1531     }
1532   else if (type == OP_CLOSE_SUBEXP)
1533     {
1534       int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1535       if (reg_num < nmatch)
1536 	{
1537 	  /* We are at the last node of this sub expression.  */
1538 	  if (pmatch[reg_num].rm_so < cur_idx)
1539 	    {
1540 	      pmatch[reg_num].rm_eo = cur_idx;
1541 	      /* This is a non-empty match or we are not inside an optional
1542 		 subexpression.  Accept this right away.  */
1543 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1544 	    }
1545 	  else
1546 	    {
1547 	      if (dfa->nodes[cur_node].opt_subexp
1548 		  && prev_idx_match[reg_num].rm_so != -1)
1549 		/* We transited through an empty match for an optional
1550 		   subexpression, like (a?)*, and this is not the subexp's
1551 		   first match.  Copy back the old content of the registers
1552 		   so that matches of an inner subexpression are undone as
1553 		   well, like in ((a?))*.  */
1554 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1555 	      else
1556 		/* We completed a subexpression, but it may be part of
1557 		   an optional one, so do not update PREV_IDX_MATCH.  */
1558 		pmatch[reg_num].rm_eo = cur_idx;
1559 	    }
1560 	}
1561     }
1562 }
1563 
1564 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1565    and sift the nodes in each states according to the following rules.
1566    Updated state_log will be wrote to STATE_LOG.
1567 
1568    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1569      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1570 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1571 	the LAST_NODE, we throw away the node `a'.
1572      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1573 	string `s' and transit to `b':
1574 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1575 	   away the node `a'.
1576 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1577 	    thrown away, we throw away the node `a'.
1578      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1579 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1580 	   node `a'.
1581 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1582 	    we throw away the node `a'.  */
1583 
1584 #define STATE_NODE_CONTAINS(state,node) \
1585   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1586 
1587 static reg_errcode_t
1588 internal_function
1589 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1590 {
1591   reg_errcode_t err;
1592   int null_cnt = 0;
1593   int str_idx = sctx->last_str_idx;
1594   re_node_set cur_dest;
1595 
1596 #ifdef DEBUG
1597   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1598 #endif
1599 
1600   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1601      transit to the last_node and the last_node itself.  */
1602   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1603   if (BE (err != REG_NOERROR, 0))
1604     return err;
1605   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1606   if (BE (err != REG_NOERROR, 0))
1607     goto free_return;
1608 
1609   /* Then check each states in the state_log.  */
1610   while (str_idx > 0)
1611     {
1612       /* Update counters.  */
1613       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1614       if (null_cnt > mctx->max_mb_elem_len)
1615 	{
1616 	  memset (sctx->sifted_states, '\0',
1617 		  sizeof (re_dfastate_t *) * str_idx);
1618 	  re_node_set_free (&cur_dest);
1619 	  return REG_NOERROR;
1620 	}
1621       re_node_set_empty (&cur_dest);
1622       --str_idx;
1623 
1624       if (mctx->state_log[str_idx])
1625 	{
1626 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1627           if (BE (err != REG_NOERROR, 0))
1628 	    goto free_return;
1629 	}
1630 
1631       /* Add all the nodes which satisfy the following conditions:
1632 	 - It can epsilon transit to a node in CUR_DEST.
1633 	 - It is in CUR_SRC.
1634 	 And update state_log.  */
1635       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1636       if (BE (err != REG_NOERROR, 0))
1637 	goto free_return;
1638     }
1639   err = REG_NOERROR;
1640  free_return:
1641   re_node_set_free (&cur_dest);
1642   return err;
1643 }
1644 
1645 static reg_errcode_t
1646 internal_function
1647 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1648 		     int str_idx, re_node_set *cur_dest)
1649 {
1650   const re_dfa_t *const dfa = mctx->dfa;
1651   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1652   int i;
1653 
1654   /* Then build the next sifted state.
1655      We build the next sifted state on `cur_dest', and update
1656      `sifted_states[str_idx]' with `cur_dest'.
1657      Note:
1658      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1659      `cur_src' points the node_set of the old `state_log[str_idx]'
1660      (with the epsilon nodes pre-filtered out).  */
1661   for (i = 0; i < cur_src->nelem; i++)
1662     {
1663       int prev_node = cur_src->elems[i];
1664       int naccepted = 0;
1665       int ret;
1666 
1667 #ifdef DEBUG
1668       re_token_type_t type = dfa->nodes[prev_node].type;
1669       assert (!IS_EPSILON_NODE (type));
1670 #endif
1671 #ifdef RE_ENABLE_I18N
1672       /* If the node may accept `multi byte'.  */
1673       if (dfa->nodes[prev_node].accept_mb)
1674 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1675 					 str_idx, sctx->last_str_idx);
1676 #endif /* RE_ENABLE_I18N */
1677 
1678       /* We don't check backreferences here.
1679 	 See update_cur_sifted_state().  */
1680       if (!naccepted
1681 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1682 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1683 				  dfa->nexts[prev_node]))
1684 	naccepted = 1;
1685 
1686       if (naccepted == 0)
1687 	continue;
1688 
1689       if (sctx->limits.nelem)
1690 	{
1691 	  int to_idx = str_idx + naccepted;
1692 	  if (check_dst_limits (mctx, &sctx->limits,
1693 				dfa->nexts[prev_node], to_idx,
1694 				prev_node, str_idx))
1695 	    continue;
1696 	}
1697       ret = re_node_set_insert (cur_dest, prev_node);
1698       if (BE (ret == -1, 0))
1699 	return REG_ESPACE;
1700     }
1701 
1702   return REG_NOERROR;
1703 }
1704 
1705 /* Helper functions.  */
1706 
1707 static reg_errcode_t
1708 internal_function
1709 clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1710 {
1711   int top = mctx->state_log_top;
1712 
1713   if (next_state_log_idx >= mctx->input.bufs_len
1714       || (next_state_log_idx >= mctx->input.valid_len
1715 	  && mctx->input.valid_len < mctx->input.len))
1716     {
1717       reg_errcode_t err;
1718       err = extend_buffers (mctx);
1719       if (BE (err != REG_NOERROR, 0))
1720 	return err;
1721     }
1722 
1723   if (top < next_state_log_idx)
1724     {
1725       memset (mctx->state_log + top + 1, '\0',
1726 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1727       mctx->state_log_top = next_state_log_idx;
1728     }
1729   return REG_NOERROR;
1730 }
1731 
1732 static reg_errcode_t
1733 internal_function
1734 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1735 		   re_dfastate_t **src, int num)
1736 {
1737   int st_idx;
1738   reg_errcode_t err;
1739   for (st_idx = 0; st_idx < num; ++st_idx)
1740     {
1741       if (dst[st_idx] == NULL)
1742 	dst[st_idx] = src[st_idx];
1743       else if (src[st_idx] != NULL)
1744 	{
1745 	  re_node_set merged_set;
1746 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1747 					&src[st_idx]->nodes);
1748 	  if (BE (err != REG_NOERROR, 0))
1749 	    return err;
1750 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1751 	  re_node_set_free (&merged_set);
1752 	  if (BE (err != REG_NOERROR, 0))
1753 	    return err;
1754 	}
1755     }
1756   return REG_NOERROR;
1757 }
1758 
1759 static reg_errcode_t
1760 internal_function
1761 update_cur_sifted_state (const re_match_context_t *mctx,
1762 			 re_sift_context_t *sctx, int str_idx,
1763 			 re_node_set *dest_nodes)
1764 {
1765   const re_dfa_t *const dfa = mctx->dfa;
1766   reg_errcode_t err = REG_NOERROR;
1767   const re_node_set *candidates;
1768   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1769 		: &mctx->state_log[str_idx]->nodes);
1770 
1771   if (dest_nodes->nelem == 0)
1772     sctx->sifted_states[str_idx] = NULL;
1773   else
1774     {
1775       if (candidates)
1776 	{
1777 	  /* At first, add the nodes which can epsilon transit to a node in
1778 	     DEST_NODE.  */
1779 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1780 	  if (BE (err != REG_NOERROR, 0))
1781 	    return err;
1782 
1783 	  /* Then, check the limitations in the current sift_context.  */
1784 	  if (sctx->limits.nelem)
1785 	    {
1786 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1787 					 mctx->bkref_ents, str_idx);
1788 	      if (BE (err != REG_NOERROR, 0))
1789 		return err;
1790 	    }
1791 	}
1792 
1793       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1794       if (BE (err != REG_NOERROR, 0))
1795 	return err;
1796     }
1797 
1798   if (candidates && mctx->state_log[str_idx]->has_backref)
1799     {
1800       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1801       if (BE (err != REG_NOERROR, 0))
1802 	return err;
1803     }
1804   return REG_NOERROR;
1805 }
1806 
1807 static reg_errcode_t
1808 internal_function
1809 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1810 		       const re_node_set *candidates)
1811 {
1812   reg_errcode_t err = REG_NOERROR;
1813   int i;
1814 
1815   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1816   if (BE (err != REG_NOERROR, 0))
1817     return err;
1818 
1819   if (!state->inveclosure.alloc)
1820     {
1821       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1822       if (BE (err != REG_NOERROR, 0))
1823         return REG_ESPACE;
1824       for (i = 0; i < dest_nodes->nelem; i++)
1825         re_node_set_merge (&state->inveclosure,
1826 			   dfa->inveclosures + dest_nodes->elems[i]);
1827     }
1828   return re_node_set_add_intersect (dest_nodes, candidates,
1829 				    &state->inveclosure);
1830 }
1831 
1832 static reg_errcode_t
1833 internal_function
1834 sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1835 		       const re_node_set *candidates)
1836 {
1837     int ecl_idx;
1838     reg_errcode_t err;
1839     re_node_set *inv_eclosure = dfa->inveclosures + node;
1840     re_node_set except_nodes;
1841     re_node_set_init_empty (&except_nodes);
1842     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1843       {
1844 	int cur_node = inv_eclosure->elems[ecl_idx];
1845 	if (cur_node == node)
1846 	  continue;
1847 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1848 	  {
1849 	    int edst1 = dfa->edests[cur_node].elems[0];
1850 	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1851 			 ? dfa->edests[cur_node].elems[1] : -1);
1852 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1853 		 && re_node_set_contains (dest_nodes, edst1))
1854 		|| (edst2 > 0
1855 		    && !re_node_set_contains (inv_eclosure, edst2)
1856 		    && re_node_set_contains (dest_nodes, edst2)))
1857 	      {
1858 		err = re_node_set_add_intersect (&except_nodes, candidates,
1859 						 dfa->inveclosures + cur_node);
1860 		if (BE (err != REG_NOERROR, 0))
1861 		  {
1862 		    re_node_set_free (&except_nodes);
1863 		    return err;
1864 		  }
1865 	      }
1866 	  }
1867       }
1868     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1869       {
1870 	int cur_node = inv_eclosure->elems[ecl_idx];
1871 	if (!re_node_set_contains (&except_nodes, cur_node))
1872 	  {
1873 	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1874 	    re_node_set_remove_at (dest_nodes, idx);
1875 	  }
1876       }
1877     re_node_set_free (&except_nodes);
1878     return REG_NOERROR;
1879 }
1880 
1881 static int
1882 internal_function
1883 check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1884 		  int dst_node, int dst_idx, int src_node, int src_idx)
1885 {
1886   const re_dfa_t *const dfa = mctx->dfa;
1887   int lim_idx, src_pos, dst_pos;
1888 
1889   int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1890   int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1891   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1892     {
1893       int subexp_idx;
1894       struct re_backref_cache_entry *ent;
1895       ent = mctx->bkref_ents + limits->elems[lim_idx];
1896       subexp_idx = dfa->nodes[ent->node].opr.idx;
1897 
1898       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1899 					   subexp_idx, dst_node, dst_idx,
1900 					   dst_bkref_idx);
1901       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1902 					   subexp_idx, src_node, src_idx,
1903 					   src_bkref_idx);
1904 
1905       /* In case of:
1906 	 <src> <dst> ( <subexp> )
1907 	 ( <subexp> ) <src> <dst>
1908 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1909       if (src_pos == dst_pos)
1910 	continue; /* This is unrelated limitation.  */
1911       else
1912 	return 1;
1913     }
1914   return 0;
1915 }
1916 
1917 static int
1918 internal_function
1919 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1920 			     int subexp_idx, int from_node, int bkref_idx)
1921 {
1922   const re_dfa_t *const dfa = mctx->dfa;
1923   const re_node_set *eclosures = dfa->eclosures + from_node;
1924   int node_idx;
1925 
1926   /* Else, we are on the boundary: examine the nodes on the epsilon
1927      closure.  */
1928   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1929     {
1930       int node = eclosures->elems[node_idx];
1931       switch (dfa->nodes[node].type)
1932 	{
1933 	case OP_BACK_REF:
1934 	  if (bkref_idx != -1)
1935 	    {
1936 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1937 	      do
1938 	        {
1939 		  int dst, cpos;
1940 
1941 		  if (ent->node != node)
1942 		    continue;
1943 
1944 		  if (subexp_idx < BITSET_WORD_BITS
1945 		      && !(ent->eps_reachable_subexps_map
1946 			   & ((bitset_word_t) 1 << subexp_idx)))
1947 		    continue;
1948 
1949 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1950 		     OP_CLOSE_SUBEXP cases below.  But, if the
1951 		     destination node is the same node as the source
1952 		     node, don't recurse because it would cause an
1953 		     infinite loop: a regex that exhibits this behavior
1954 		     is ()\1*\1*  */
1955 		  dst = dfa->edests[node].elems[0];
1956 		  if (dst == from_node)
1957 		    {
1958 		      if (boundaries & 1)
1959 		        return -1;
1960 		      else /* if (boundaries & 2) */
1961 		        return 0;
1962 		    }
1963 
1964 		  cpos =
1965 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1966 						 dst, bkref_idx);
1967 		  if (cpos == -1 /* && (boundaries & 1) */)
1968 		    return -1;
1969 		  if (cpos == 0 && (boundaries & 2))
1970 		    return 0;
1971 
1972 		  if (subexp_idx < BITSET_WORD_BITS)
1973 		    ent->eps_reachable_subexps_map
1974 		      &= ~((bitset_word_t) 1 << subexp_idx);
1975 	        }
1976 	      while (ent++->more);
1977 	    }
1978 	  break;
1979 
1980 	case OP_OPEN_SUBEXP:
1981 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1982 	    return -1;
1983 	  break;
1984 
1985 	case OP_CLOSE_SUBEXP:
1986 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1987 	    return 0;
1988 	  break;
1989 
1990 	default:
1991 	    break;
1992 	}
1993     }
1994 
1995   return (boundaries & 2) ? 1 : 0;
1996 }
1997 
1998 static int
1999 internal_function
2000 check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2001 			   int subexp_idx, int from_node, int str_idx,
2002 			   int bkref_idx)
2003 {
2004   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2005   int boundaries;
2006 
2007   /* If we are outside the range of the subexpression, return -1 or 1.  */
2008   if (str_idx < lim->subexp_from)
2009     return -1;
2010 
2011   if (lim->subexp_to < str_idx)
2012     return 1;
2013 
2014   /* If we are within the subexpression, return 0.  */
2015   boundaries = (str_idx == lim->subexp_from);
2016   boundaries |= (str_idx == lim->subexp_to) << 1;
2017   if (boundaries == 0)
2018     return 0;
2019 
2020   /* Else, examine epsilon closure.  */
2021   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2022 				      from_node, bkref_idx);
2023 }
2024 
2025 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2026    which are against limitations from DEST_NODES. */
2027 
2028 static reg_errcode_t
2029 internal_function
2030 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2031 		     const re_node_set *candidates, re_node_set *limits,
2032 		     struct re_backref_cache_entry *bkref_ents, int str_idx)
2033 {
2034   reg_errcode_t err;
2035   int node_idx, lim_idx;
2036 
2037   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2038     {
2039       int subexp_idx;
2040       struct re_backref_cache_entry *ent;
2041       ent = bkref_ents + limits->elems[lim_idx];
2042 
2043       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2044 	continue; /* This is unrelated limitation.  */
2045 
2046       subexp_idx = dfa->nodes[ent->node].opr.idx;
2047       if (ent->subexp_to == str_idx)
2048 	{
2049 	  int ops_node = -1;
2050 	  int cls_node = -1;
2051 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2052 	    {
2053 	      int node = dest_nodes->elems[node_idx];
2054 	      re_token_type_t type = dfa->nodes[node].type;
2055 	      if (type == OP_OPEN_SUBEXP
2056 		  && subexp_idx == dfa->nodes[node].opr.idx)
2057 		ops_node = node;
2058 	      else if (type == OP_CLOSE_SUBEXP
2059 		       && subexp_idx == dfa->nodes[node].opr.idx)
2060 		cls_node = node;
2061 	    }
2062 
2063 	  /* Check the limitation of the open subexpression.  */
2064 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2065 	  if (ops_node >= 0)
2066 	    {
2067 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2068 					   candidates);
2069 	      if (BE (err != REG_NOERROR, 0))
2070 		return err;
2071 	    }
2072 
2073 	  /* Check the limitation of the close subexpression.  */
2074 	  if (cls_node >= 0)
2075 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2076 	      {
2077 		int node = dest_nodes->elems[node_idx];
2078 		if (!re_node_set_contains (dfa->inveclosures + node,
2079 					   cls_node)
2080 		    && !re_node_set_contains (dfa->eclosures + node,
2081 					      cls_node))
2082 		  {
2083 		    /* It is against this limitation.
2084 		       Remove it form the current sifted state.  */
2085 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2086 						 candidates);
2087 		    if (BE (err != REG_NOERROR, 0))
2088 		      return err;
2089 		    --node_idx;
2090 		  }
2091 	      }
2092 	}
2093       else /* (ent->subexp_to != str_idx)  */
2094 	{
2095 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2096 	    {
2097 	      int node = dest_nodes->elems[node_idx];
2098 	      re_token_type_t type = dfa->nodes[node].type;
2099 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2100 		{
2101 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2102 		    continue;
2103 		  /* It is against this limitation.
2104 		     Remove it form the current sifted state.  */
2105 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2106 					       candidates);
2107 		  if (BE (err != REG_NOERROR, 0))
2108 		    return err;
2109 		}
2110 	    }
2111 	}
2112     }
2113   return REG_NOERROR;
2114 }
2115 
2116 static reg_errcode_t
2117 internal_function
2118 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2119 		   int str_idx, const re_node_set *candidates)
2120 {
2121   const re_dfa_t *const dfa = mctx->dfa;
2122   reg_errcode_t err;
2123   int node_idx, node;
2124   re_sift_context_t local_sctx;
2125   int first_idx = search_cur_bkref_entry (mctx, str_idx);
2126 
2127   if (first_idx == -1)
2128     return REG_NOERROR;
2129 
2130   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2131 
2132   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2133     {
2134       int enabled_idx;
2135       re_token_type_t type;
2136       struct re_backref_cache_entry *entry;
2137       node = candidates->elems[node_idx];
2138       type = dfa->nodes[node].type;
2139       /* Avoid infinite loop for the REs like "()\1+".  */
2140       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2141 	continue;
2142       if (type != OP_BACK_REF)
2143 	continue;
2144 
2145       entry = mctx->bkref_ents + first_idx;
2146       enabled_idx = first_idx;
2147       do
2148 	{
2149 	  int subexp_len;
2150 	  int to_idx;
2151 	  int dst_node;
2152 	  int ret;
2153 	  re_dfastate_t *cur_state;
2154 
2155 	  if (entry->node != node)
2156 	    continue;
2157 	  subexp_len = entry->subexp_to - entry->subexp_from;
2158 	  to_idx = str_idx + subexp_len;
2159 	  dst_node = (subexp_len ? dfa->nexts[node]
2160 		      : dfa->edests[node].elems[0]);
2161 
2162 	  if (to_idx > sctx->last_str_idx
2163 	      || sctx->sifted_states[to_idx] == NULL
2164 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2165 	      || check_dst_limits (mctx, &sctx->limits, node,
2166 				   str_idx, dst_node, to_idx))
2167 	    continue;
2168 
2169 	  if (local_sctx.sifted_states == NULL)
2170 	    {
2171 	      local_sctx = *sctx;
2172 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2173 	      if (BE (err != REG_NOERROR, 0))
2174 		goto free_return;
2175 	    }
2176 	  local_sctx.last_node = node;
2177 	  local_sctx.last_str_idx = str_idx;
2178 	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2179 	  if (BE (ret < 0, 0))
2180 	    {
2181 	      err = REG_ESPACE;
2182 	      goto free_return;
2183 	    }
2184 	  cur_state = local_sctx.sifted_states[str_idx];
2185 	  err = sift_states_backward (mctx, &local_sctx);
2186 	  if (BE (err != REG_NOERROR, 0))
2187 	    goto free_return;
2188 	  if (sctx->limited_states != NULL)
2189 	    {
2190 	      err = merge_state_array (dfa, sctx->limited_states,
2191 				       local_sctx.sifted_states,
2192 				       str_idx + 1);
2193 	      if (BE (err != REG_NOERROR, 0))
2194 		goto free_return;
2195 	    }
2196 	  local_sctx.sifted_states[str_idx] = cur_state;
2197 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2198 
2199 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2200           entry = mctx->bkref_ents + enabled_idx;
2201 	}
2202       while (enabled_idx++, entry++->more);
2203     }
2204   err = REG_NOERROR;
2205  free_return:
2206   if (local_sctx.sifted_states != NULL)
2207     {
2208       re_node_set_free (&local_sctx.limits);
2209     }
2210 
2211   return err;
2212 }
2213 
2214 
2215 #ifdef RE_ENABLE_I18N
2216 static int
2217 internal_function
2218 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2219 		     int node_idx, int str_idx, int max_str_idx)
2220 {
2221   const re_dfa_t *const dfa = mctx->dfa;
2222   int naccepted;
2223   /* Check the node can accept `multi byte'.  */
2224   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2225   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2226       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2227 			    dfa->nexts[node_idx]))
2228     /* The node can't accept the `multi byte', or the
2229        destination was already thrown away, then the node
2230        could't accept the current input `multi byte'.   */
2231     naccepted = 0;
2232   /* Otherwise, it is sure that the node could accept
2233      `naccepted' bytes input.  */
2234   return naccepted;
2235 }
2236 #endif /* RE_ENABLE_I18N */
2237 
2238 
2239 /* Functions for state transition.  */
2240 
2241 /* Return the next state to which the current state STATE will transit by
2242    accepting the current input byte, and update STATE_LOG if necessary.
2243    If STATE can accept a multibyte char/collating element/back reference
2244    update the destination of STATE_LOG.  */
2245 
2246 static re_dfastate_t *
2247 internal_function
2248 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2249 	       re_dfastate_t *state)
2250 {
2251   re_dfastate_t **trtable;
2252   unsigned char ch;
2253 
2254 #ifdef RE_ENABLE_I18N
2255   /* If the current state can accept multibyte.  */
2256   if (BE (state->accept_mb, 0))
2257     {
2258       *err = transit_state_mb (mctx, state);
2259       if (BE (*err != REG_NOERROR, 0))
2260 	return NULL;
2261     }
2262 #endif /* RE_ENABLE_I18N */
2263 
2264   /* Then decide the next state with the single byte.  */
2265 #if 0
2266   if (0)
2267     /* don't use transition table  */
2268     return transit_state_sb (err, mctx, state);
2269 #endif
2270 
2271   /* Use transition table  */
2272   ch = re_string_fetch_byte (&mctx->input);
2273   for (;;)
2274     {
2275       trtable = state->trtable;
2276       if (BE (trtable != NULL, 1))
2277 	return trtable[ch];
2278 
2279       trtable = state->word_trtable;
2280       if (BE (trtable != NULL, 1))
2281         {
2282 	  unsigned int context;
2283 	  context
2284 	    = re_string_context_at (&mctx->input,
2285 				    re_string_cur_idx (&mctx->input) - 1,
2286 				    mctx->eflags);
2287 	  if (IS_WORD_CONTEXT (context))
2288 	    return trtable[ch + SBC_MAX];
2289 	  else
2290 	    return trtable[ch];
2291 	}
2292 
2293       if (!build_trtable (mctx->dfa, state))
2294 	{
2295 	  *err = REG_ESPACE;
2296 	  return NULL;
2297 	}
2298 
2299       /* Retry, we now have a transition table.  */
2300     }
2301 }
2302 
2303 /* Update the state_log if we need */
2304 re_dfastate_t *
2305 internal_function
2306 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2307 		      re_dfastate_t *next_state)
2308 {
2309   const re_dfa_t *const dfa = mctx->dfa;
2310   int cur_idx = re_string_cur_idx (&mctx->input);
2311 
2312   if (cur_idx > mctx->state_log_top)
2313     {
2314       mctx->state_log[cur_idx] = next_state;
2315       mctx->state_log_top = cur_idx;
2316     }
2317   else if (mctx->state_log[cur_idx] == 0)
2318     {
2319       mctx->state_log[cur_idx] = next_state;
2320     }
2321   else
2322     {
2323       re_dfastate_t *pstate;
2324       unsigned int context;
2325       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2326       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2327          the destination of a multibyte char/collating element/
2328          back reference.  Then the next state is the union set of
2329          these destinations and the results of the transition table.  */
2330       pstate = mctx->state_log[cur_idx];
2331       log_nodes = pstate->entrance_nodes;
2332       if (next_state != NULL)
2333         {
2334           table_nodes = next_state->entrance_nodes;
2335           *err = re_node_set_init_union (&next_nodes, table_nodes,
2336 					     log_nodes);
2337           if (BE (*err != REG_NOERROR, 0))
2338 	    return NULL;
2339         }
2340       else
2341         next_nodes = *log_nodes;
2342       /* Note: We already add the nodes of the initial state,
2343 	 then we don't need to add them here.  */
2344 
2345       context = re_string_context_at (&mctx->input,
2346 				      re_string_cur_idx (&mctx->input) - 1,
2347 				      mctx->eflags);
2348       next_state = mctx->state_log[cur_idx]
2349         = re_acquire_state_context (err, dfa, &next_nodes, context);
2350       /* We don't need to check errors here, since the return value of
2351          this function is next_state and ERR is already set.  */
2352 
2353       if (table_nodes != NULL)
2354         re_node_set_free (&next_nodes);
2355     }
2356 
2357   if (BE (dfa->nbackref, 0) && next_state != NULL)
2358     {
2359       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2360 	 later.  We must check them here, since the back references in the
2361 	 next state might use them.  */
2362       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2363 					cur_idx);
2364       if (BE (*err != REG_NOERROR, 0))
2365 	return NULL;
2366 
2367       /* If the next state has back references.  */
2368       if (next_state->has_backref)
2369 	{
2370 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2371 	  if (BE (*err != REG_NOERROR, 0))
2372 	    return NULL;
2373 	  next_state = mctx->state_log[cur_idx];
2374 	}
2375     }
2376 
2377   return next_state;
2378 }
2379 
2380 /* Skip bytes in the input that correspond to part of a
2381    multi-byte match, then look in the log for a state
2382    from which to restart matching.  */
2383 re_dfastate_t *
2384 internal_function
2385 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2386 {
2387   re_dfastate_t *cur_state;
2388   do
2389     {
2390       int max = mctx->state_log_top;
2391       int cur_str_idx = re_string_cur_idx (&mctx->input);
2392 
2393       do
2394 	{
2395           if (++cur_str_idx > max)
2396             return NULL;
2397           re_string_skip_bytes (&mctx->input, 1);
2398 	}
2399       while (mctx->state_log[cur_str_idx] == NULL);
2400 
2401       cur_state = merge_state_with_log (err, mctx, NULL);
2402     }
2403   while (*err == REG_NOERROR && cur_state == NULL);
2404   return cur_state;
2405 }
2406 
2407 /* Helper functions for transit_state.  */
2408 
2409 /* From the node set CUR_NODES, pick up the nodes whose types are
2410    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2411    expression. And register them to use them later for evaluating the
2412    correspoding back references.  */
2413 
2414 static reg_errcode_t
2415 internal_function
2416 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2417 			   int str_idx)
2418 {
2419   const re_dfa_t *const dfa = mctx->dfa;
2420   int node_idx;
2421   reg_errcode_t err;
2422 
2423   /* TODO: This isn't efficient.
2424 	   Because there might be more than one nodes whose types are
2425 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2426 	   nodes.
2427 	   E.g. RE: (a){2}  */
2428   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2429     {
2430       int node = cur_nodes->elems[node_idx];
2431       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2432 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2433 	  && (dfa->used_bkref_map
2434 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2435 	{
2436 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2437 	  if (BE (err != REG_NOERROR, 0))
2438 	    return err;
2439 	}
2440     }
2441   return REG_NOERROR;
2442 }
2443 
2444 #if 0
2445 /* Return the next state to which the current state STATE will transit by
2446    accepting the current input byte.  */
2447 
2448 static re_dfastate_t *
2449 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2450 		  re_dfastate_t *state)
2451 {
2452   const re_dfa_t *const dfa = mctx->dfa;
2453   re_node_set next_nodes;
2454   re_dfastate_t *next_state;
2455   int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2456   unsigned int context;
2457 
2458   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2459   if (BE (*err != REG_NOERROR, 0))
2460     return NULL;
2461   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2462     {
2463       int cur_node = state->nodes.elems[node_cnt];
2464       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2465 	{
2466 	  *err = re_node_set_merge (&next_nodes,
2467 				    dfa->eclosures + dfa->nexts[cur_node]);
2468 	  if (BE (*err != REG_NOERROR, 0))
2469 	    {
2470 	      re_node_set_free (&next_nodes);
2471 	      return NULL;
2472 	    }
2473 	}
2474     }
2475   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2476   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2477   /* We don't need to check errors here, since the return value of
2478      this function is next_state and ERR is already set.  */
2479 
2480   re_node_set_free (&next_nodes);
2481   re_string_skip_bytes (&mctx->input, 1);
2482   return next_state;
2483 }
2484 #endif
2485 
2486 #ifdef RE_ENABLE_I18N
2487 static reg_errcode_t
2488 internal_function
2489 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2490 {
2491   const re_dfa_t *const dfa = mctx->dfa;
2492   reg_errcode_t err;
2493   int i;
2494 
2495   for (i = 0; i < pstate->nodes.nelem; ++i)
2496     {
2497       re_node_set dest_nodes, *new_nodes;
2498       int cur_node_idx = pstate->nodes.elems[i];
2499       int naccepted, dest_idx;
2500       unsigned int context;
2501       re_dfastate_t *dest_state;
2502 
2503       if (!dfa->nodes[cur_node_idx].accept_mb)
2504         continue;
2505 
2506       if (dfa->nodes[cur_node_idx].constraint)
2507 	{
2508 	  context = re_string_context_at (&mctx->input,
2509 					  re_string_cur_idx (&mctx->input),
2510 					  mctx->eflags);
2511 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2512 					   context))
2513 	    continue;
2514 	}
2515 
2516       /* How many bytes the node can accept?  */
2517       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2518 					   re_string_cur_idx (&mctx->input));
2519       if (naccepted == 0)
2520 	continue;
2521 
2522       /* The node can accepts `naccepted' bytes.  */
2523       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2524       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2525 			       : mctx->max_mb_elem_len);
2526       err = clean_state_log_if_needed (mctx, dest_idx);
2527       if (BE (err != REG_NOERROR, 0))
2528 	return err;
2529 #ifdef DEBUG
2530       assert (dfa->nexts[cur_node_idx] != -1);
2531 #endif
2532       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2533 
2534       dest_state = mctx->state_log[dest_idx];
2535       if (dest_state == NULL)
2536 	dest_nodes = *new_nodes;
2537       else
2538 	{
2539 	  err = re_node_set_init_union (&dest_nodes,
2540 					dest_state->entrance_nodes, new_nodes);
2541 	  if (BE (err != REG_NOERROR, 0))
2542 	    return err;
2543 	}
2544       context = re_string_context_at (&mctx->input, dest_idx - 1,
2545 				      mctx->eflags);
2546       mctx->state_log[dest_idx]
2547 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2548       if (dest_state != NULL)
2549 	re_node_set_free (&dest_nodes);
2550       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2551 	return err;
2552     }
2553   return REG_NOERROR;
2554 }
2555 #endif /* RE_ENABLE_I18N */
2556 
2557 static reg_errcode_t
2558 internal_function
2559 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2560 {
2561   const re_dfa_t *const dfa = mctx->dfa;
2562   reg_errcode_t err;
2563   int i;
2564   int cur_str_idx = re_string_cur_idx (&mctx->input);
2565 
2566   for (i = 0; i < nodes->nelem; ++i)
2567     {
2568       int dest_str_idx, prev_nelem, bkc_idx;
2569       int node_idx = nodes->elems[i];
2570       unsigned int context;
2571       const re_token_t *node = dfa->nodes + node_idx;
2572       re_node_set *new_dest_nodes;
2573 
2574       /* Check whether `node' is a backreference or not.  */
2575       if (node->type != OP_BACK_REF)
2576 	continue;
2577 
2578       if (node->constraint)
2579 	{
2580 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2581 					  mctx->eflags);
2582 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2583 	    continue;
2584 	}
2585 
2586       /* `node' is a backreference.
2587 	 Check the substring which the substring matched.  */
2588       bkc_idx = mctx->nbkref_ents;
2589       err = get_subexp (mctx, node_idx, cur_str_idx);
2590       if (BE (err != REG_NOERROR, 0))
2591 	goto free_return;
2592 
2593       /* And add the epsilon closures (which is `new_dest_nodes') of
2594 	 the backreference to appropriate state_log.  */
2595 #ifdef DEBUG
2596       assert (dfa->nexts[node_idx] != -1);
2597 #endif
2598       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2599 	{
2600 	  int subexp_len;
2601 	  re_dfastate_t *dest_state;
2602 	  struct re_backref_cache_entry *bkref_ent;
2603 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2604 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2605 	    continue;
2606 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2607 	  new_dest_nodes = (subexp_len == 0
2608 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2609 			    : dfa->eclosures + dfa->nexts[node_idx]);
2610 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2611 			  - bkref_ent->subexp_from);
2612 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2613 					  mctx->eflags);
2614 	  dest_state = mctx->state_log[dest_str_idx];
2615 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2616 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2617 	  /* Add `new_dest_node' to state_log.  */
2618 	  if (dest_state == NULL)
2619 	    {
2620 	      mctx->state_log[dest_str_idx]
2621 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2622 					    context);
2623 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2624 		      && err != REG_NOERROR, 0))
2625 		goto free_return;
2626 	    }
2627 	  else
2628 	    {
2629 	      re_node_set dest_nodes;
2630 	      err = re_node_set_init_union (&dest_nodes,
2631 					    dest_state->entrance_nodes,
2632 					    new_dest_nodes);
2633 	      if (BE (err != REG_NOERROR, 0))
2634 		{
2635 		  re_node_set_free (&dest_nodes);
2636 		  goto free_return;
2637 		}
2638 	      mctx->state_log[dest_str_idx]
2639 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2640 	      re_node_set_free (&dest_nodes);
2641 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2642 		      && err != REG_NOERROR, 0))
2643 		goto free_return;
2644 	    }
2645 	  /* We need to check recursively if the backreference can epsilon
2646 	     transit.  */
2647 	  if (subexp_len == 0
2648 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2649 	    {
2650 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2651 					       cur_str_idx);
2652 	      if (BE (err != REG_NOERROR, 0))
2653 		goto free_return;
2654 	      err = transit_state_bkref (mctx, new_dest_nodes);
2655 	      if (BE (err != REG_NOERROR, 0))
2656 		goto free_return;
2657 	    }
2658 	}
2659     }
2660   err = REG_NOERROR;
2661  free_return:
2662   return err;
2663 }
2664 
2665 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2666    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2667    Note that we might collect inappropriate candidates here.
2668    However, the cost of checking them strictly here is too high, then we
2669    delay these checking for prune_impossible_nodes().  */
2670 
2671 static reg_errcode_t
2672 internal_function
2673 get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2674 {
2675   const re_dfa_t *const dfa = mctx->dfa;
2676   int subexp_num, sub_top_idx;
2677   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2678   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2679   int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2680   if (cache_idx != -1)
2681     {
2682       const struct re_backref_cache_entry *entry
2683 	= mctx->bkref_ents + cache_idx;
2684       do
2685         if (entry->node == bkref_node)
2686 	  return REG_NOERROR; /* We already checked it.  */
2687       while (entry++->more);
2688     }
2689 
2690   subexp_num = dfa->nodes[bkref_node].opr.idx;
2691 
2692   /* For each sub expression  */
2693   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2694     {
2695       reg_errcode_t err;
2696       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2697       re_sub_match_last_t *sub_last;
2698       int sub_last_idx, sl_str, bkref_str_off;
2699 
2700       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2701 	continue; /* It isn't related.  */
2702 
2703       sl_str = sub_top->str_idx;
2704       bkref_str_off = bkref_str_idx;
2705       /* At first, check the last node of sub expressions we already
2706 	 evaluated.  */
2707       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2708 	{
2709 	  int sl_str_diff;
2710 	  sub_last = sub_top->lasts[sub_last_idx];
2711 	  sl_str_diff = sub_last->str_idx - sl_str;
2712 	  /* The matched string by the sub expression match with the substring
2713 	     at the back reference?  */
2714 	  if (sl_str_diff > 0)
2715 	    {
2716 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2717 		{
2718 		  /* Not enough chars for a successful match.  */
2719 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2720 		    break;
2721 
2722 		  err = clean_state_log_if_needed (mctx,
2723 						   bkref_str_off
2724 						   + sl_str_diff);
2725 		  if (BE (err != REG_NOERROR, 0))
2726 		    return err;
2727 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2728 		}
2729 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2730 		/* We don't need to search this sub expression any more.  */
2731 		break;
2732 	    }
2733 	  bkref_str_off += sl_str_diff;
2734 	  sl_str += sl_str_diff;
2735 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2736 				bkref_str_idx);
2737 
2738 	  /* Reload buf, since the preceding call might have reallocated
2739 	     the buffer.  */
2740 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2741 
2742 	  if (err == REG_NOMATCH)
2743 	    continue;
2744 	  if (BE (err != REG_NOERROR, 0))
2745 	    return err;
2746 	}
2747 
2748       if (sub_last_idx < sub_top->nlasts)
2749 	continue;
2750       if (sub_last_idx > 0)
2751 	++sl_str;
2752       /* Then, search for the other last nodes of the sub expression.  */
2753       for (; sl_str <= bkref_str_idx; ++sl_str)
2754 	{
2755 	  int cls_node, sl_str_off;
2756 	  const re_node_set *nodes;
2757 	  sl_str_off = sl_str - sub_top->str_idx;
2758 	  /* The matched string by the sub expression match with the substring
2759 	     at the back reference?  */
2760 	  if (sl_str_off > 0)
2761 	    {
2762 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2763 		{
2764 		  /* If we are at the end of the input, we cannot match.  */
2765 		  if (bkref_str_off >= mctx->input.len)
2766 		    break;
2767 
2768 		  err = extend_buffers (mctx);
2769 		  if (BE (err != REG_NOERROR, 0))
2770 		    return err;
2771 
2772 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2773 		}
2774 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2775 		break; /* We don't need to search this sub expression
2776 			  any more.  */
2777 	    }
2778 	  if (mctx->state_log[sl_str] == NULL)
2779 	    continue;
2780 	  /* Does this state have a ')' of the sub expression?  */
2781 	  nodes = &mctx->state_log[sl_str]->nodes;
2782 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2783 				       OP_CLOSE_SUBEXP);
2784 	  if (cls_node == -1)
2785 	    continue; /* No.  */
2786 	  if (sub_top->path == NULL)
2787 	    {
2788 	      sub_top->path = calloc (sizeof (state_array_t),
2789 				      sl_str - sub_top->str_idx + 1);
2790 	      if (sub_top->path == NULL)
2791 		return REG_ESPACE;
2792 	    }
2793 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2794 	     in the current context?  */
2795 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2796 			       sub_top->str_idx, cls_node, sl_str,
2797 			       OP_CLOSE_SUBEXP);
2798 	  if (err == REG_NOMATCH)
2799 	      continue;
2800 	  if (BE (err != REG_NOERROR, 0))
2801 	      return err;
2802 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2803 	  if (BE (sub_last == NULL, 0))
2804 	    return REG_ESPACE;
2805 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2806 				bkref_str_idx);
2807 	  if (err == REG_NOMATCH)
2808 	    continue;
2809 	}
2810     }
2811   return REG_NOERROR;
2812 }
2813 
2814 /* Helper functions for get_subexp().  */
2815 
2816 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2817    If it can arrive, register the sub expression expressed with SUB_TOP
2818    and SUB_LAST.  */
2819 
2820 static reg_errcode_t
2821 internal_function
2822 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2823 		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2824 {
2825   reg_errcode_t err;
2826   int to_idx;
2827   /* Can the subexpression arrive the back reference?  */
2828   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2829 		       sub_last->str_idx, bkref_node, bkref_str,
2830 		       OP_OPEN_SUBEXP);
2831   if (err != REG_NOERROR)
2832     return err;
2833   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2834 			     sub_last->str_idx);
2835   if (BE (err != REG_NOERROR, 0))
2836     return err;
2837   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2838   return clean_state_log_if_needed (mctx, to_idx);
2839 }
2840 
2841 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2842    Search '(' if FL_OPEN, or search ')' otherwise.
2843    TODO: This function isn't efficient...
2844 	 Because there might be more than one nodes whose types are
2845 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2846 	 nodes.
2847 	 E.g. RE: (a){2}  */
2848 
2849 static int
2850 internal_function
2851 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2852 		  int subexp_idx, int type)
2853 {
2854   int cls_idx;
2855   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2856     {
2857       int cls_node = nodes->elems[cls_idx];
2858       const re_token_t *node = dfa->nodes + cls_node;
2859       if (node->type == type
2860 	  && node->opr.idx == subexp_idx)
2861 	return cls_node;
2862     }
2863   return -1;
2864 }
2865 
2866 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2867    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2868    heavily reused.
2869    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2870 
2871 static reg_errcode_t
2872 internal_function
2873 check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2874 	       int top_str, int last_node, int last_str, int type)
2875 {
2876   const re_dfa_t *const dfa = mctx->dfa;
2877   reg_errcode_t err = REG_NOERROR;
2878   int subexp_num, backup_cur_idx, str_idx, null_cnt;
2879   re_dfastate_t *cur_state = NULL;
2880   re_node_set *cur_nodes, next_nodes;
2881   re_dfastate_t **backup_state_log;
2882   unsigned int context;
2883 
2884   subexp_num = dfa->nodes[top_node].opr.idx;
2885   /* Extend the buffer if we need.  */
2886   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2887     {
2888       re_dfastate_t **new_array;
2889       int old_alloc = path->alloc;
2890       path->alloc += last_str + mctx->max_mb_elem_len + 1;
2891       new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2892       if (BE (new_array == NULL, 0))
2893 	{
2894 	  path->alloc = old_alloc;
2895 	  return REG_ESPACE;
2896 	}
2897       path->array = new_array;
2898       memset (new_array + old_alloc, '\0',
2899 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2900     }
2901 
2902   str_idx = path->next_idx ?: top_str;
2903 
2904   /* Temporary modify MCTX.  */
2905   backup_state_log = mctx->state_log;
2906   backup_cur_idx = mctx->input.cur_idx;
2907   mctx->state_log = path->array;
2908   mctx->input.cur_idx = str_idx;
2909 
2910   /* Setup initial node set.  */
2911   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2912   if (str_idx == top_str)
2913     {
2914       err = re_node_set_init_1 (&next_nodes, top_node);
2915       if (BE (err != REG_NOERROR, 0))
2916 	return err;
2917       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2918       if (BE (err != REG_NOERROR, 0))
2919 	{
2920 	  re_node_set_free (&next_nodes);
2921 	  return err;
2922 	}
2923     }
2924   else
2925     {
2926       cur_state = mctx->state_log[str_idx];
2927       if (cur_state && cur_state->has_backref)
2928 	{
2929 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2930 	  if (BE (err != REG_NOERROR, 0))
2931 	    return err;
2932 	}
2933       else
2934 	re_node_set_init_empty (&next_nodes);
2935     }
2936   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2937     {
2938       if (next_nodes.nelem)
2939 	{
2940 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2941 				    subexp_num, type);
2942 	  if (BE (err != REG_NOERROR, 0))
2943 	    {
2944 	      re_node_set_free (&next_nodes);
2945 	      return err;
2946 	    }
2947 	}
2948       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2949       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2950 	{
2951 	  re_node_set_free (&next_nodes);
2952 	  return err;
2953 	}
2954       mctx->state_log[str_idx] = cur_state;
2955     }
2956 
2957   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2958     {
2959       re_node_set_empty (&next_nodes);
2960       if (mctx->state_log[str_idx + 1])
2961 	{
2962 	  err = re_node_set_merge (&next_nodes,
2963 				   &mctx->state_log[str_idx + 1]->nodes);
2964 	  if (BE (err != REG_NOERROR, 0))
2965 	    {
2966 	      re_node_set_free (&next_nodes);
2967 	      return err;
2968 	    }
2969 	}
2970       if (cur_state)
2971 	{
2972 	  err = check_arrival_add_next_nodes (mctx, str_idx,
2973 					      &cur_state->non_eps_nodes,
2974 					      &next_nodes);
2975 	  if (BE (err != REG_NOERROR, 0))
2976 	    {
2977 	      re_node_set_free (&next_nodes);
2978 	      return err;
2979 	    }
2980 	}
2981       ++str_idx;
2982       if (next_nodes.nelem)
2983 	{
2984 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2985 	  if (BE (err != REG_NOERROR, 0))
2986 	    {
2987 	      re_node_set_free (&next_nodes);
2988 	      return err;
2989 	    }
2990 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2991 				    subexp_num, type);
2992 	  if (BE (err != REG_NOERROR, 0))
2993 	    {
2994 	      re_node_set_free (&next_nodes);
2995 	      return err;
2996 	    }
2997 	}
2998       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2999       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3000       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3001 	{
3002 	  re_node_set_free (&next_nodes);
3003 	  return err;
3004 	}
3005       mctx->state_log[str_idx] = cur_state;
3006       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3007     }
3008   re_node_set_free (&next_nodes);
3009   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3010 	       : &mctx->state_log[last_str]->nodes);
3011   path->next_idx = str_idx;
3012 
3013   /* Fix MCTX.  */
3014   mctx->state_log = backup_state_log;
3015   mctx->input.cur_idx = backup_cur_idx;
3016 
3017   /* Then check the current node set has the node LAST_NODE.  */
3018   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3019     return REG_NOERROR;
3020 
3021   return REG_NOMATCH;
3022 }
3023 
3024 /* Helper functions for check_arrival.  */
3025 
3026 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3027    to NEXT_NODES.
3028    TODO: This function is similar to the functions transit_state*(),
3029 	 however this function has many additional works.
3030 	 Can't we unify them?  */
3031 
3032 static reg_errcode_t
3033 internal_function
3034 check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3035 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3036 {
3037   const re_dfa_t *const dfa = mctx->dfa;
3038   int result;
3039   int cur_idx;
3040   reg_errcode_t err = REG_NOERROR;
3041   re_node_set union_set;
3042   re_node_set_init_empty (&union_set);
3043   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3044     {
3045       int naccepted = 0;
3046       int cur_node = cur_nodes->elems[cur_idx];
3047 #ifdef DEBUG
3048       re_token_type_t type = dfa->nodes[cur_node].type;
3049       assert (!IS_EPSILON_NODE (type));
3050 #endif
3051 #ifdef RE_ENABLE_I18N
3052       /* If the node may accept `multi byte'.  */
3053       if (dfa->nodes[cur_node].accept_mb)
3054 	{
3055 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3056 					       str_idx);
3057 	  if (naccepted > 1)
3058 	    {
3059 	      re_dfastate_t *dest_state;
3060 	      int next_node = dfa->nexts[cur_node];
3061 	      int next_idx = str_idx + naccepted;
3062 	      dest_state = mctx->state_log[next_idx];
3063 	      re_node_set_empty (&union_set);
3064 	      if (dest_state)
3065 		{
3066 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3067 		  if (BE (err != REG_NOERROR, 0))
3068 		    {
3069 		      re_node_set_free (&union_set);
3070 		      return err;
3071 		    }
3072 		}
3073 	      result = re_node_set_insert (&union_set, next_node);
3074 	      if (BE (result < 0, 0))
3075 		{
3076 		  re_node_set_free (&union_set);
3077 		  return REG_ESPACE;
3078 		}
3079 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3080 							    &union_set);
3081 	      if (BE (mctx->state_log[next_idx] == NULL
3082 		      && err != REG_NOERROR, 0))
3083 		{
3084 		  re_node_set_free (&union_set);
3085 		  return err;
3086 		}
3087 	    }
3088 	}
3089 #endif /* RE_ENABLE_I18N */
3090       if (naccepted
3091 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3092 	{
3093 	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3094 	  if (BE (result < 0, 0))
3095 	    {
3096 	      re_node_set_free (&union_set);
3097 	      return REG_ESPACE;
3098 	    }
3099 	}
3100     }
3101   re_node_set_free (&union_set);
3102   return REG_NOERROR;
3103 }
3104 
3105 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3106    CUR_NODES, however exclude the nodes which are:
3107     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3108     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3109 */
3110 
3111 static reg_errcode_t
3112 internal_function
3113 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3114 			  int ex_subexp, int type)
3115 {
3116   reg_errcode_t err;
3117   int idx, outside_node;
3118   re_node_set new_nodes;
3119 #ifdef DEBUG
3120   assert (cur_nodes->nelem);
3121 #endif
3122   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3123   if (BE (err != REG_NOERROR, 0))
3124     return err;
3125   /* Create a new node set NEW_NODES with the nodes which are epsilon
3126      closures of the node in CUR_NODES.  */
3127 
3128   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3129     {
3130       int cur_node = cur_nodes->elems[idx];
3131       const re_node_set *eclosure = dfa->eclosures + cur_node;
3132       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3133       if (outside_node == -1)
3134 	{
3135 	  /* There are no problematic nodes, just merge them.  */
3136 	  err = re_node_set_merge (&new_nodes, eclosure);
3137 	  if (BE (err != REG_NOERROR, 0))
3138 	    {
3139 	      re_node_set_free (&new_nodes);
3140 	      return err;
3141 	    }
3142 	}
3143       else
3144 	{
3145 	  /* There are problematic nodes, re-calculate incrementally.  */
3146 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3147 					      ex_subexp, type);
3148 	  if (BE (err != REG_NOERROR, 0))
3149 	    {
3150 	      re_node_set_free (&new_nodes);
3151 	      return err;
3152 	    }
3153 	}
3154     }
3155   re_node_set_free (cur_nodes);
3156   *cur_nodes = new_nodes;
3157   return REG_NOERROR;
3158 }
3159 
3160 /* Helper function for check_arrival_expand_ecl.
3161    Check incrementally the epsilon closure of TARGET, and if it isn't
3162    problematic append it to DST_NODES.  */
3163 
3164 static reg_errcode_t
3165 internal_function
3166 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3167 			      int target, int ex_subexp, int type)
3168 {
3169   int cur_node;
3170   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3171     {
3172       int err;
3173 
3174       if (dfa->nodes[cur_node].type == type
3175 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3176 	{
3177 	  if (type == OP_CLOSE_SUBEXP)
3178 	    {
3179 	      err = re_node_set_insert (dst_nodes, cur_node);
3180 	      if (BE (err == -1, 0))
3181 		return REG_ESPACE;
3182 	    }
3183 	  break;
3184 	}
3185       err = re_node_set_insert (dst_nodes, cur_node);
3186       if (BE (err == -1, 0))
3187 	return REG_ESPACE;
3188       if (dfa->edests[cur_node].nelem == 0)
3189 	break;
3190       if (dfa->edests[cur_node].nelem == 2)
3191 	{
3192 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3193 					      dfa->edests[cur_node].elems[1],
3194 					      ex_subexp, type);
3195 	  if (BE (err != REG_NOERROR, 0))
3196 	    return err;
3197 	}
3198       cur_node = dfa->edests[cur_node].elems[0];
3199     }
3200   return REG_NOERROR;
3201 }
3202 
3203 
3204 /* For all the back references in the current state, calculate the
3205    destination of the back references by the appropriate entry
3206    in MCTX->BKREF_ENTS.  */
3207 
3208 static reg_errcode_t
3209 internal_function
3210 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3211 		    int cur_str, int subexp_num, int type)
3212 {
3213   const re_dfa_t *const dfa = mctx->dfa;
3214   reg_errcode_t err;
3215   int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3216   struct re_backref_cache_entry *ent;
3217 
3218   if (cache_idx_start == -1)
3219     return REG_NOERROR;
3220 
3221  restart:
3222   ent = mctx->bkref_ents + cache_idx_start;
3223   do
3224     {
3225       int to_idx, next_node;
3226 
3227       /* Is this entry ENT is appropriate?  */
3228       if (!re_node_set_contains (cur_nodes, ent->node))
3229 	continue; /* No.  */
3230 
3231       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3232       /* Calculate the destination of the back reference, and append it
3233 	 to MCTX->STATE_LOG.  */
3234       if (to_idx == cur_str)
3235 	{
3236 	  /* The backreference did epsilon transit, we must re-check all the
3237 	     node in the current state.  */
3238 	  re_node_set new_dests;
3239 	  reg_errcode_t err2, err3;
3240 	  next_node = dfa->edests[ent->node].elems[0];
3241 	  if (re_node_set_contains (cur_nodes, next_node))
3242 	    continue;
3243 	  err = re_node_set_init_1 (&new_dests, next_node);
3244 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3245 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3246 	  re_node_set_free (&new_dests);
3247 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3248 		  || err3 != REG_NOERROR, 0))
3249 	    {
3250 	      err = (err != REG_NOERROR ? err
3251 		     : (err2 != REG_NOERROR ? err2 : err3));
3252 	      return err;
3253 	    }
3254 	  /* TODO: It is still inefficient...  */
3255 	  goto restart;
3256 	}
3257       else
3258 	{
3259 	  re_node_set union_set;
3260 	  next_node = dfa->nexts[ent->node];
3261 	  if (mctx->state_log[to_idx])
3262 	    {
3263 	      int ret;
3264 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3265 					next_node))
3266 		continue;
3267 	      err = re_node_set_init_copy (&union_set,
3268 					   &mctx->state_log[to_idx]->nodes);
3269 	      ret = re_node_set_insert (&union_set, next_node);
3270 	      if (BE (err != REG_NOERROR || ret < 0, 0))
3271 		{
3272 		  re_node_set_free (&union_set);
3273 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3274 		  return err;
3275 		}
3276 	    }
3277 	  else
3278 	    {
3279 	      err = re_node_set_init_1 (&union_set, next_node);
3280 	      if (BE (err != REG_NOERROR, 0))
3281 		return err;
3282 	    }
3283 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3284 	  re_node_set_free (&union_set);
3285 	  if (BE (mctx->state_log[to_idx] == NULL
3286 		  && err != REG_NOERROR, 0))
3287 	    return err;
3288 	}
3289     }
3290   while (ent++->more);
3291   return REG_NOERROR;
3292 }
3293 
3294 /* Build transition table for the state.
3295    Return 1 if succeeded, otherwise return NULL.  */
3296 
3297 static int
3298 internal_function
3299 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3300 {
3301   reg_errcode_t err;
3302   int i, j, ch, need_word_trtable = 0;
3303   bitset_word_t elem, mask;
3304   bool dests_node_malloced = false;
3305   bool dest_states_malloced = false;
3306   int ndests; /* Number of the destination states from `state'.  */
3307   re_dfastate_t **trtable;
3308   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3309   re_node_set follows, *dests_node;
3310   bitset_t *dests_ch;
3311   bitset_t acceptable;
3312 
3313   struct dests_alloc
3314   {
3315     re_node_set dests_node[SBC_MAX];
3316     bitset_t dests_ch[SBC_MAX];
3317   } *dests_alloc;
3318 
3319   /* We build DFA states which corresponds to the destination nodes
3320      from `state'.  `dests_node[i]' represents the nodes which i-th
3321      destination state contains, and `dests_ch[i]' represents the
3322      characters which i-th destination state accepts.  */
3323   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3324     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3325   else
3326     {
3327       dests_alloc = re_malloc (struct dests_alloc, 1);
3328       if (BE (dests_alloc == NULL, 0))
3329 	return 0;
3330       dests_node_malloced = true;
3331     }
3332   dests_node = dests_alloc->dests_node;
3333   dests_ch = dests_alloc->dests_ch;
3334 
3335   /* Initialize transiton table.  */
3336   state->word_trtable = state->trtable = NULL;
3337 
3338   /* At first, group all nodes belonging to `state' into several
3339      destinations.  */
3340   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3341   if (BE (ndests <= 0, 0))
3342     {
3343       if (dests_node_malloced)
3344 	free (dests_alloc);
3345       /* Return 0 in case of an error, 1 otherwise.  */
3346       if (ndests == 0)
3347 	{
3348 	  state->trtable = (re_dfastate_t **)
3349 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3350 	  return 1;
3351 	}
3352       return 0;
3353     }
3354 
3355   err = re_node_set_alloc (&follows, ndests + 1);
3356   if (BE (err != REG_NOERROR, 0))
3357     goto out_free;
3358 
3359   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3360 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3361     dest_states = (re_dfastate_t **)
3362       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3363   else
3364     {
3365       dest_states = (re_dfastate_t **)
3366 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3367       if (BE (dest_states == NULL, 0))
3368 	{
3369 out_free:
3370 	  if (dest_states_malloced)
3371 	    free (dest_states);
3372 	  re_node_set_free (&follows);
3373 	  for (i = 0; i < ndests; ++i)
3374 	    re_node_set_free (dests_node + i);
3375 	  if (dests_node_malloced)
3376 	    free (dests_alloc);
3377 	  return 0;
3378 	}
3379       dest_states_malloced = true;
3380     }
3381   dest_states_word = dest_states + ndests;
3382   dest_states_nl = dest_states_word + ndests;
3383   bitset_empty (acceptable);
3384 
3385   /* Then build the states for all destinations.  */
3386   for (i = 0; i < ndests; ++i)
3387     {
3388       int next_node;
3389       re_node_set_empty (&follows);
3390       /* Merge the follows of this destination states.  */
3391       for (j = 0; j < dests_node[i].nelem; ++j)
3392 	{
3393 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3394 	  if (next_node != -1)
3395 	    {
3396 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3397 	      if (BE (err != REG_NOERROR, 0))
3398 		goto out_free;
3399 	    }
3400 	}
3401       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3402       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3403 	goto out_free;
3404       /* If the new state has context constraint,
3405 	 build appropriate states for these contexts.  */
3406       if (dest_states[i]->has_constraint)
3407 	{
3408 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3409 							  CONTEXT_WORD);
3410 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3411 	    goto out_free;
3412 
3413 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3414 	    need_word_trtable = 1;
3415 
3416 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3417 							CONTEXT_NEWLINE);
3418 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3419 	    goto out_free;
3420  	}
3421       else
3422 	{
3423 	  dest_states_word[i] = dest_states[i];
3424 	  dest_states_nl[i] = dest_states[i];
3425 	}
3426       bitset_merge (acceptable, dests_ch[i]);
3427     }
3428 
3429   if (!BE (need_word_trtable, 0))
3430     {
3431       /* We don't care about whether the following character is a word
3432 	 character, or we are in a single-byte character set so we can
3433 	 discern by looking at the character code: allocate a
3434 	 256-entry transition table.  */
3435       trtable = state->trtable =
3436 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3437       if (BE (trtable == NULL, 0))
3438 	goto out_free;
3439 
3440       /* For all characters ch...:  */
3441       for (i = 0; i < BITSET_WORDS; ++i)
3442 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3443 	     elem;
3444 	     mask <<= 1, elem >>= 1, ++ch)
3445 	  if (BE (elem & 1, 0))
3446 	    {
3447 	      /* There must be exactly one destination which accepts
3448 		 character ch.  See group_nodes_into_DFAstates.  */
3449 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3450 		;
3451 
3452 	      /* j-th destination accepts the word character ch.  */
3453 	      if (dfa->word_char[i] & mask)
3454 		trtable[ch] = dest_states_word[j];
3455 	      else
3456 		trtable[ch] = dest_states[j];
3457 	    }
3458     }
3459   else
3460     {
3461       /* We care about whether the following character is a word
3462 	 character, and we are in a multi-byte character set: discern
3463 	 by looking at the character code: build two 256-entry
3464 	 transition tables, one starting at trtable[0] and one
3465 	 starting at trtable[SBC_MAX].  */
3466       trtable = state->word_trtable =
3467 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3468       if (BE (trtable == NULL, 0))
3469 	goto out_free;
3470 
3471       /* For all characters ch...:  */
3472       for (i = 0; i < BITSET_WORDS; ++i)
3473 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3474 	     elem;
3475 	     mask <<= 1, elem >>= 1, ++ch)
3476 	  if (BE (elem & 1, 0))
3477 	    {
3478 	      /* There must be exactly one destination which accepts
3479 		 character ch.  See group_nodes_into_DFAstates.  */
3480 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3481 		;
3482 
3483 	      /* j-th destination accepts the word character ch.  */
3484 	      trtable[ch] = dest_states[j];
3485 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3486 	    }
3487     }
3488 
3489   /* new line */
3490   if (bitset_contain (acceptable, NEWLINE_CHAR))
3491     {
3492       /* The current state accepts newline character.  */
3493       for (j = 0; j < ndests; ++j)
3494 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3495 	  {
3496 	    /* k-th destination accepts newline character.  */
3497 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3498 	    if (need_word_trtable)
3499 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3500 	    /* There must be only one destination which accepts
3501 	       newline.  See group_nodes_into_DFAstates.  */
3502 	    break;
3503 	  }
3504     }
3505 
3506   if (dest_states_malloced)
3507     free (dest_states);
3508 
3509   re_node_set_free (&follows);
3510   for (i = 0; i < ndests; ++i)
3511     re_node_set_free (dests_node + i);
3512 
3513   if (dests_node_malloced)
3514     free (dests_alloc);
3515 
3516   return 1;
3517 }
3518 
3519 /* Group all nodes belonging to STATE into several destinations.
3520    Then for all destinations, set the nodes belonging to the destination
3521    to DESTS_NODE[i] and set the characters accepted by the destination
3522    to DEST_CH[i].  This function return the number of destinations.  */
3523 
3524 static int
3525 internal_function
3526 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3527 			    re_node_set *dests_node, bitset_t *dests_ch)
3528 {
3529   reg_errcode_t err;
3530   int result;
3531   int i, j, k;
3532   int ndests; /* Number of the destinations from `state'.  */
3533   bitset_t accepts; /* Characters a node can accept.  */
3534   const re_node_set *cur_nodes = &state->nodes;
3535   bitset_empty (accepts);
3536   ndests = 0;
3537 
3538   /* For all the nodes belonging to `state',  */
3539   for (i = 0; i < cur_nodes->nelem; ++i)
3540     {
3541       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3542       re_token_type_t type = node->type;
3543       unsigned int constraint = node->constraint;
3544 
3545       /* Enumerate all single byte character this node can accept.  */
3546       if (type == CHARACTER)
3547 	bitset_set (accepts, node->opr.c);
3548       else if (type == SIMPLE_BRACKET)
3549 	{
3550 	  bitset_merge (accepts, node->opr.sbcset);
3551 	}
3552       else if (type == OP_PERIOD)
3553 	{
3554 #ifdef RE_ENABLE_I18N
3555 	  if (dfa->mb_cur_max > 1)
3556 	    bitset_merge (accepts, dfa->sb_char);
3557 	  else
3558 #endif
3559 	    bitset_set_all (accepts);
3560 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3561 	    bitset_clear (accepts, '\n');
3562 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3563 	    bitset_clear (accepts, '\0');
3564 	}
3565 #ifdef RE_ENABLE_I18N
3566       else if (type == OP_UTF8_PERIOD)
3567         {
3568 	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
3569 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3570 	    bitset_clear (accepts, '\n');
3571 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3572 	    bitset_clear (accepts, '\0');
3573         }
3574 #endif
3575       else
3576 	continue;
3577 
3578       /* Check the `accepts' and sift the characters which are not
3579 	 match it the context.  */
3580       if (constraint)
3581 	{
3582 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3583 	    {
3584 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3585 	      bitset_empty (accepts);
3586 	      if (accepts_newline)
3587 		bitset_set (accepts, NEWLINE_CHAR);
3588 	      else
3589 		continue;
3590 	    }
3591 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3592 	    {
3593 	      bitset_empty (accepts);
3594 	      continue;
3595 	    }
3596 
3597 	  if (constraint & NEXT_WORD_CONSTRAINT)
3598 	    {
3599 	      bitset_word_t any_set = 0;
3600 	      if (type == CHARACTER && !node->word_char)
3601 		{
3602 		  bitset_empty (accepts);
3603 		  continue;
3604 		}
3605 #ifdef RE_ENABLE_I18N
3606 	      if (dfa->mb_cur_max > 1)
3607 		for (j = 0; j < BITSET_WORDS; ++j)
3608 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3609 	      else
3610 #endif
3611 		for (j = 0; j < BITSET_WORDS; ++j)
3612 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3613 	      if (!any_set)
3614 		continue;
3615 	    }
3616 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3617 	    {
3618 	      bitset_word_t any_set = 0;
3619 	      if (type == CHARACTER && node->word_char)
3620 		{
3621 		  bitset_empty (accepts);
3622 		  continue;
3623 		}
3624 #ifdef RE_ENABLE_I18N
3625 	      if (dfa->mb_cur_max > 1)
3626 		for (j = 0; j < BITSET_WORDS; ++j)
3627 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3628 	      else
3629 #endif
3630 		for (j = 0; j < BITSET_WORDS; ++j)
3631 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3632 	      if (!any_set)
3633 		continue;
3634 	    }
3635 	}
3636 
3637       /* Then divide `accepts' into DFA states, or create a new
3638 	 state.  Above, we make sure that accepts is not empty.  */
3639       for (j = 0; j < ndests; ++j)
3640 	{
3641 	  bitset_t intersec; /* Intersection sets, see below.  */
3642 	  bitset_t remains;
3643 	  /* Flags, see below.  */
3644 	  bitset_word_t has_intersec, not_subset, not_consumed;
3645 
3646 	  /* Optimization, skip if this state doesn't accept the character.  */
3647 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3648 	    continue;
3649 
3650 	  /* Enumerate the intersection set of this state and `accepts'.  */
3651 	  has_intersec = 0;
3652 	  for (k = 0; k < BITSET_WORDS; ++k)
3653 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3654 	  /* And skip if the intersection set is empty.  */
3655 	  if (!has_intersec)
3656 	    continue;
3657 
3658 	  /* Then check if this state is a subset of `accepts'.  */
3659 	  not_subset = not_consumed = 0;
3660 	  for (k = 0; k < BITSET_WORDS; ++k)
3661 	    {
3662 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3663 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3664 	    }
3665 
3666 	  /* If this state isn't a subset of `accepts', create a
3667 	     new group state, which has the `remains'. */
3668 	  if (not_subset)
3669 	    {
3670 	      bitset_copy (dests_ch[ndests], remains);
3671 	      bitset_copy (dests_ch[j], intersec);
3672 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3673 	      if (BE (err != REG_NOERROR, 0))
3674 		goto error_return;
3675 	      ++ndests;
3676 	    }
3677 
3678 	  /* Put the position in the current group. */
3679 	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3680 	  if (BE (result < 0, 0))
3681 	    goto error_return;
3682 
3683 	  /* If all characters are consumed, go to next node. */
3684 	  if (!not_consumed)
3685 	    break;
3686 	}
3687       /* Some characters remain, create a new group. */
3688       if (j == ndests)
3689 	{
3690 	  bitset_copy (dests_ch[ndests], accepts);
3691 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3692 	  if (BE (err != REG_NOERROR, 0))
3693 	    goto error_return;
3694 	  ++ndests;
3695 	  bitset_empty (accepts);
3696 	}
3697     }
3698   return ndests;
3699  error_return:
3700   for (j = 0; j < ndests; ++j)
3701     re_node_set_free (dests_node + j);
3702   return -1;
3703 }
3704 
3705 #ifdef RE_ENABLE_I18N
3706 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3707    Return the number of the bytes the node accepts.
3708    STR_IDX is the current index of the input string.
3709 
3710    This function handles the nodes which can accept one character, or
3711    one collating element like '.', '[a-z]', opposite to the other nodes
3712    can only accept one byte.  */
3713 
3714 static int
3715 internal_function
3716 check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3717 			 const re_string_t *input, int str_idx)
3718 {
3719   const re_token_t *node = dfa->nodes + node_idx;
3720   int char_len, elem_len;
3721   int i;
3722 
3723   if (BE (node->type == OP_UTF8_PERIOD, 0))
3724     {
3725       unsigned char c = re_string_byte_at (input, str_idx), d;
3726       if (BE (c < 0xc2, 1))
3727 	return 0;
3728 
3729       if (str_idx + 2 > input->len)
3730 	return 0;
3731 
3732       d = re_string_byte_at (input, str_idx + 1);
3733       if (c < 0xe0)
3734 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3735       else if (c < 0xf0)
3736 	{
3737 	  char_len = 3;
3738 	  if (c == 0xe0 && d < 0xa0)
3739 	    return 0;
3740 	}
3741       else if (c < 0xf8)
3742 	{
3743 	  char_len = 4;
3744 	  if (c == 0xf0 && d < 0x90)
3745 	    return 0;
3746 	}
3747       else if (c < 0xfc)
3748 	{
3749 	  char_len = 5;
3750 	  if (c == 0xf8 && d < 0x88)
3751 	    return 0;
3752 	}
3753       else if (c < 0xfe)
3754 	{
3755 	  char_len = 6;
3756 	  if (c == 0xfc && d < 0x84)
3757 	    return 0;
3758 	}
3759       else
3760 	return 0;
3761 
3762       if (str_idx + char_len > input->len)
3763 	return 0;
3764 
3765       for (i = 1; i < char_len; ++i)
3766 	{
3767 	  d = re_string_byte_at (input, str_idx + i);
3768 	  if (d < 0x80 || d > 0xbf)
3769 	    return 0;
3770 	}
3771       return char_len;
3772     }
3773 
3774   char_len = re_string_char_size_at (input, str_idx);
3775   if (node->type == OP_PERIOD)
3776     {
3777       if (char_len <= 1)
3778         return 0;
3779       /* FIXME: I don't think this if is needed, as both '\n'
3780 	 and '\0' are char_len == 1.  */
3781       /* '.' accepts any one character except the following two cases.  */
3782       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3783 	   re_string_byte_at (input, str_idx) == '\n') ||
3784 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3785 	   re_string_byte_at (input, str_idx) == '\0'))
3786 	return 0;
3787       return char_len;
3788     }
3789 
3790   elem_len = re_string_elem_size_at (input, str_idx);
3791   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3792     return 0;
3793 
3794   if (node->type == COMPLEX_BRACKET)
3795     {
3796       const re_charset_t *cset = node->opr.mbcset;
3797 # ifdef _LIBC
3798       const unsigned char *pin
3799 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3800       int j;
3801       uint32_t nrules;
3802 # endif /* _LIBC */
3803       int match_len = 0;
3804       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3805 		    ? re_string_wchar_at (input, str_idx) : 0);
3806 
3807       /* match with multibyte character?  */
3808       for (i = 0; i < cset->nmbchars; ++i)
3809 	if (wc == cset->mbchars[i])
3810 	  {
3811 	    match_len = char_len;
3812 	    goto check_node_accept_bytes_match;
3813 	  }
3814       /* match with character_class?  */
3815       for (i = 0; i < cset->nchar_classes; ++i)
3816 	{
3817 	  wctype_t wt = cset->char_classes[i];
3818 	  if (__iswctype (wc, wt))
3819 	    {
3820 	      match_len = char_len;
3821 	      goto check_node_accept_bytes_match;
3822 	    }
3823 	}
3824 
3825 # ifdef _LIBC
3826       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3827       if (nrules != 0)
3828 	{
3829 	  unsigned int in_collseq = 0;
3830 	  const int32_t *table, *indirect;
3831 	  const unsigned char *weights, *extra;
3832 	  const char *collseqwc;
3833 	  /* This #include defines a local function!  */
3834 #  include <locale/weight.h>
3835 
3836 	  /* match with collating_symbol?  */
3837 	  if (cset->ncoll_syms)
3838 	    extra = (const unsigned char *)
3839 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3840 	  for (i = 0; i < cset->ncoll_syms; ++i)
3841 	    {
3842 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3843 	      /* Compare the length of input collating element and
3844 		 the length of current collating element.  */
3845 	      if (*coll_sym != elem_len)
3846 		continue;
3847 	      /* Compare each bytes.  */
3848 	      for (j = 0; j < *coll_sym; j++)
3849 		if (pin[j] != coll_sym[1 + j])
3850 		  break;
3851 	      if (j == *coll_sym)
3852 		{
3853 		  /* Match if every bytes is equal.  */
3854 		  match_len = j;
3855 		  goto check_node_accept_bytes_match;
3856 		}
3857 	    }
3858 
3859 	  if (cset->nranges)
3860 	    {
3861 	      if (elem_len <= char_len)
3862 		{
3863 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3864 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3865 		}
3866 	      else
3867 		in_collseq = find_collation_sequence_value (pin, elem_len);
3868 	    }
3869 	  /* match with range expression?  */
3870 	  for (i = 0; i < cset->nranges; ++i)
3871 	    if (cset->range_starts[i] <= in_collseq
3872 		&& in_collseq <= cset->range_ends[i])
3873 	      {
3874 		match_len = elem_len;
3875 		goto check_node_accept_bytes_match;
3876 	      }
3877 
3878 	  /* match with equivalence_class?  */
3879 	  if (cset->nequiv_classes)
3880 	    {
3881 	      const unsigned char *cp = pin;
3882 	      table = (const int32_t *)
3883 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3884 	      weights = (const unsigned char *)
3885 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3886 	      extra = (const unsigned char *)
3887 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3888 	      indirect = (const int32_t *)
3889 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3890 	      int32_t idx = findidx (&cp);
3891 	      if (idx > 0)
3892 		for (i = 0; i < cset->nequiv_classes; ++i)
3893 		  {
3894 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3895 		    size_t weight_len = weights[idx & 0xffffff];
3896 		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3897 			&& (idx >> 24) == (equiv_class_idx >> 24))
3898 		      {
3899 			int cnt = 0;
3900 
3901 			idx &= 0xffffff;
3902 			equiv_class_idx &= 0xffffff;
3903 
3904 			while (cnt <= weight_len
3905 			       && (weights[equiv_class_idx + 1 + cnt]
3906 				   == weights[idx + 1 + cnt]))
3907 			  ++cnt;
3908 			if (cnt > weight_len)
3909 			  {
3910 			    match_len = elem_len;
3911 			    goto check_node_accept_bytes_match;
3912 			  }
3913 		      }
3914 		  }
3915 	    }
3916 	}
3917       else
3918 # endif /* _LIBC */
3919 	{
3920 	  /* match with range expression?  */
3921 #if __GNUC__ >= 2
3922 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3923 #else
3924 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3925 	  cmp_buf[2] = wc;
3926 #endif
3927 	  for (i = 0; i < cset->nranges; ++i)
3928 	    {
3929 	      cmp_buf[0] = cset->range_starts[i];
3930 	      cmp_buf[4] = cset->range_ends[i];
3931 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3932 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3933 		{
3934 		  match_len = char_len;
3935 		  goto check_node_accept_bytes_match;
3936 		}
3937 	    }
3938 	}
3939     check_node_accept_bytes_match:
3940       if (!cset->non_match)
3941 	return match_len;
3942       else
3943 	{
3944 	  if (match_len > 0)
3945 	    return 0;
3946 	  else
3947 	    return (elem_len > char_len) ? elem_len : char_len;
3948 	}
3949     }
3950   return 0;
3951 }
3952 
3953 # ifdef _LIBC
3954 static unsigned int
3955 internal_function
3956 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3957 {
3958   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3959   if (nrules == 0)
3960     {
3961       if (mbs_len == 1)
3962 	{
3963 	  /* No valid character.  Match it as a single byte character.  */
3964 	  const unsigned char *collseq = (const unsigned char *)
3965 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3966 	  return collseq[mbs[0]];
3967 	}
3968       return UINT_MAX;
3969     }
3970   else
3971     {
3972       int32_t idx;
3973       const unsigned char *extra = (const unsigned char *)
3974 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3975       int32_t extrasize = (const unsigned char *)
3976 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3977 
3978       for (idx = 0; idx < extrasize;)
3979 	{
3980 	  int mbs_cnt, found = 0;
3981 	  int32_t elem_mbs_len;
3982 	  /* Skip the name of collating element name.  */
3983 	  idx = idx + extra[idx] + 1;
3984 	  elem_mbs_len = extra[idx++];
3985 	  if (mbs_len == elem_mbs_len)
3986 	    {
3987 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3988 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3989 		  break;
3990 	      if (mbs_cnt == elem_mbs_len)
3991 		/* Found the entry.  */
3992 		found = 1;
3993 	    }
3994 	  /* Skip the byte sequence of the collating element.  */
3995 	  idx += elem_mbs_len;
3996 	  /* Adjust for the alignment.  */
3997 	  idx = (idx + 3) & ~3;
3998 	  /* Skip the collation sequence value.  */
3999 	  idx += sizeof (uint32_t);
4000 	  /* Skip the wide char sequence of the collating element.  */
4001 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4002 	  /* If we found the entry, return the sequence value.  */
4003 	  if (found)
4004 	    return *(uint32_t *) (extra + idx);
4005 	  /* Skip the collation sequence value.  */
4006 	  idx += sizeof (uint32_t);
4007 	}
4008       return UINT_MAX;
4009     }
4010 }
4011 # endif /* _LIBC */
4012 #endif /* RE_ENABLE_I18N */
4013 
4014 /* Check whether the node accepts the byte which is IDX-th
4015    byte of the INPUT.  */
4016 
4017 static int
4018 internal_function
4019 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4020 		   int idx)
4021 {
4022   unsigned char ch;
4023   ch = re_string_byte_at (&mctx->input, idx);
4024   switch (node->type)
4025     {
4026     case CHARACTER:
4027       if (node->opr.c != ch)
4028         return 0;
4029       break;
4030 
4031     case SIMPLE_BRACKET:
4032       if (!bitset_contain (node->opr.sbcset, ch))
4033         return 0;
4034       break;
4035 
4036 #ifdef RE_ENABLE_I18N
4037     case OP_UTF8_PERIOD:
4038       if (ch >= 0x80)
4039         return 0;
4040       /* FALLTHROUGH */
4041 #endif
4042     case OP_PERIOD:
4043       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4044 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4045 	return 0;
4046       break;
4047 
4048     default:
4049       return 0;
4050     }
4051 
4052   if (node->constraint)
4053     {
4054       /* The node has constraints.  Check whether the current context
4055 	 satisfies the constraints.  */
4056       unsigned int context = re_string_context_at (&mctx->input, idx,
4057 						   mctx->eflags);
4058       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4059 	return 0;
4060     }
4061 
4062   return 1;
4063 }
4064 
4065 /* Extend the buffers, if the buffers have run out.  */
4066 
4067 static reg_errcode_t
4068 internal_function
4069 extend_buffers (re_match_context_t *mctx)
4070 {
4071   reg_errcode_t ret;
4072   re_string_t *pstr = &mctx->input;
4073 
4074   /* Double the lengthes of the buffers.  */
4075   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4076   if (BE (ret != REG_NOERROR, 0))
4077     return ret;
4078 
4079   if (mctx->state_log != NULL)
4080     {
4081       /* And double the length of state_log.  */
4082       /* XXX We have no indication of the size of this buffer.  If this
4083 	 allocation fail we have no indication that the state_log array
4084 	 does not have the right size.  */
4085       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4086 					      pstr->bufs_len + 1);
4087       if (BE (new_array == NULL, 0))
4088 	return REG_ESPACE;
4089       mctx->state_log = new_array;
4090     }
4091 
4092   /* Then reconstruct the buffers.  */
4093   if (pstr->icase)
4094     {
4095 #ifdef RE_ENABLE_I18N
4096       if (pstr->mb_cur_max > 1)
4097 	{
4098 	  ret = build_wcs_upper_buffer (pstr);
4099 	  if (BE (ret != REG_NOERROR, 0))
4100 	    return ret;
4101 	}
4102       else
4103 #endif /* RE_ENABLE_I18N  */
4104 	build_upper_buffer (pstr);
4105     }
4106   else
4107     {
4108 #ifdef RE_ENABLE_I18N
4109       if (pstr->mb_cur_max > 1)
4110 	build_wcs_buffer (pstr);
4111       else
4112 #endif /* RE_ENABLE_I18N  */
4113 	{
4114 	  if (pstr->trans != NULL)
4115 	    re_string_translate_buffer (pstr);
4116 	}
4117     }
4118   return REG_NOERROR;
4119 }
4120 
4121 
4122 /* Functions for matching context.  */
4123 
4124 /* Initialize MCTX.  */
4125 
4126 static reg_errcode_t
4127 internal_function
4128 match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4129 {
4130   mctx->eflags = eflags;
4131   mctx->match_last = -1;
4132   if (n > 0)
4133     {
4134       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4135       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4136       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4137 	return REG_ESPACE;
4138     }
4139   /* Already zero-ed by the caller.
4140      else
4141        mctx->bkref_ents = NULL;
4142      mctx->nbkref_ents = 0;
4143      mctx->nsub_tops = 0;  */
4144   mctx->abkref_ents = n;
4145   mctx->max_mb_elem_len = 1;
4146   mctx->asub_tops = n;
4147   return REG_NOERROR;
4148 }
4149 
4150 /* Clean the entries which depend on the current input in MCTX.
4151    This function must be invoked when the matcher changes the start index
4152    of the input, or changes the input string.  */
4153 
4154 static void
4155 internal_function
4156 match_ctx_clean (re_match_context_t *mctx)
4157 {
4158   int st_idx;
4159   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4160     {
4161       int sl_idx;
4162       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4163       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4164 	{
4165 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4166 	  re_free (last->path.array);
4167 	  re_free (last);
4168 	}
4169       re_free (top->lasts);
4170       if (top->path)
4171 	{
4172 	  re_free (top->path->array);
4173 	  re_free (top->path);
4174 	}
4175       free (top);
4176     }
4177 
4178   mctx->nsub_tops = 0;
4179   mctx->nbkref_ents = 0;
4180 }
4181 
4182 /* Free all the memory associated with MCTX.  */
4183 
4184 static void
4185 internal_function
4186 match_ctx_free (re_match_context_t *mctx)
4187 {
4188   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4189   match_ctx_clean (mctx);
4190   re_free (mctx->sub_tops);
4191   re_free (mctx->bkref_ents);
4192 }
4193 
4194 /* Add a new backreference entry to MCTX.
4195    Note that we assume that caller never call this function with duplicate
4196    entry, and call with STR_IDX which isn't smaller than any existing entry.
4197 */
4198 
4199 static reg_errcode_t
4200 internal_function
4201 match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4202 		     int to)
4203 {
4204   if (mctx->nbkref_ents >= mctx->abkref_ents)
4205     {
4206       struct re_backref_cache_entry* new_entry;
4207       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4208 			      mctx->abkref_ents * 2);
4209       if (BE (new_entry == NULL, 0))
4210 	{
4211 	  re_free (mctx->bkref_ents);
4212 	  return REG_ESPACE;
4213 	}
4214       mctx->bkref_ents = new_entry;
4215       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4216 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4217       mctx->abkref_ents *= 2;
4218     }
4219   if (mctx->nbkref_ents > 0
4220       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4221     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4222 
4223   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4224   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4225   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4226   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4227 
4228   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4229      If bit N is clear, means that this entry won't epsilon-transition to
4230      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4231      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4232      such node.
4233 
4234      A backreference does not epsilon-transition unless it is empty, so set
4235      to all zeros if FROM != TO.  */
4236   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4237     = (from == to ? ~0 : 0);
4238 
4239   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4240   if (mctx->max_mb_elem_len < to - from)
4241     mctx->max_mb_elem_len = to - from;
4242   return REG_NOERROR;
4243 }
4244 
4245 /* Search for the first entry which has the same str_idx, or -1 if none is
4246    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4247 
4248 static int
4249 internal_function
4250 search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4251 {
4252   int left, right, mid, last;
4253   last = right = mctx->nbkref_ents;
4254   for (left = 0; left < right;)
4255     {
4256       mid = (left + right) / 2;
4257       if (mctx->bkref_ents[mid].str_idx < str_idx)
4258 	left = mid + 1;
4259       else
4260 	right = mid;
4261     }
4262   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4263     return left;
4264   else
4265     return -1;
4266 }
4267 
4268 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4269    at STR_IDX.  */
4270 
4271 static reg_errcode_t
4272 internal_function
4273 match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4274 {
4275 #ifdef DEBUG
4276   assert (mctx->sub_tops != NULL);
4277   assert (mctx->asub_tops > 0);
4278 #endif
4279   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4280     {
4281       int new_asub_tops = mctx->asub_tops * 2;
4282       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4283 						   re_sub_match_top_t *,
4284 						   new_asub_tops);
4285       if (BE (new_array == NULL, 0))
4286 	return REG_ESPACE;
4287       mctx->sub_tops = new_array;
4288       mctx->asub_tops = new_asub_tops;
4289     }
4290   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4291   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4292     return REG_ESPACE;
4293   mctx->sub_tops[mctx->nsub_tops]->node = node;
4294   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4295   return REG_NOERROR;
4296 }
4297 
4298 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4299    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4300 
4301 static re_sub_match_last_t *
4302 internal_function
4303 match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4304 {
4305   re_sub_match_last_t *new_entry;
4306   if (BE (subtop->nlasts == subtop->alasts, 0))
4307     {
4308       int new_alasts = 2 * subtop->alasts + 1;
4309       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4310 						    re_sub_match_last_t *,
4311 						    new_alasts);
4312       if (BE (new_array == NULL, 0))
4313 	return NULL;
4314       subtop->lasts = new_array;
4315       subtop->alasts = new_alasts;
4316     }
4317   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4318   if (BE (new_entry != NULL, 1))
4319     {
4320       subtop->lasts[subtop->nlasts] = new_entry;
4321       new_entry->node = node;
4322       new_entry->str_idx = str_idx;
4323       ++subtop->nlasts;
4324     }
4325   return new_entry;
4326 }
4327 
4328 static void
4329 internal_function
4330 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4331 	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4332 {
4333   sctx->sifted_states = sifted_sts;
4334   sctx->limited_states = limited_sts;
4335   sctx->last_node = last_node;
4336   sctx->last_str_idx = last_str_idx;
4337   re_node_set_init_empty (&sctx->limits);
4338 }
4339