xref: /haiku/src/system/libroot/posix/glibc/regex/regexec.c (revision caed67a8cba83913b9c21ac2b06ebc6bd1cb3111)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15 
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
19 
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 				     Idx n);
22 static void match_ctx_clean (re_match_context_t *mctx);
23 static void match_ctx_free (re_match_context_t *cache);
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 					  Idx str_idx, Idx from, Idx to);
26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28 					   Idx str_idx);
29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30 						    Idx node, Idx str_idx);
31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
32 			   re_dfastate_t **limited_sts, Idx last_node,
33 			   Idx last_str_idx);
34 static reg_errcode_t re_search_internal (const regex_t *preg,
35 					 const char *string, Idx length,
36 					 Idx start, Idx last_start, Idx stop,
37 					 size_t nmatch, regmatch_t pmatch[],
38 					 int eflags);
39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40 				  const char *string1, Idx length1,
41 				  const char *string2, Idx length2,
42 				  Idx start, regoff_t range,
43 				  struct re_registers *regs,
44 				  Idx stop, bool ret_len);
45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46 				const char *string, Idx length, Idx start,
47 				regoff_t range, Idx stop,
48 				struct re_registers *regs,
49 				bool ret_len);
50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51                               Idx nregs, int regs_allocated);
52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54 			   Idx *p_match_first);
55 static Idx check_halt_state_context (const re_match_context_t *mctx,
56 				     const re_dfastate_t *state, Idx idx);
57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
58 			 regmatch_t *prev_idx_match, Idx cur_node,
59 			 Idx cur_idx, Idx nmatch);
60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61 				      Idx str_idx, Idx dest_node, Idx nregs,
62 				      regmatch_t *regs,
63 				      re_node_set *eps_via_nodes);
64 static reg_errcode_t set_regs (const regex_t *preg,
65 			       const re_match_context_t *mctx,
66 			       size_t nmatch, regmatch_t *pmatch,
67 			       bool fl_backtrack);
68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69 
70 #ifdef RE_ENABLE_I18N
71 static int sift_states_iter_mb (const re_match_context_t *mctx,
72 				re_sift_context_t *sctx,
73 				Idx node_idx, Idx str_idx, Idx max_str_idx);
74 #endif /* RE_ENABLE_I18N */
75 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76 					   re_sift_context_t *sctx);
77 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78 					  re_sift_context_t *sctx, Idx str_idx,
79 					  re_node_set *cur_dest);
80 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81 					      re_sift_context_t *sctx,
82 					      Idx str_idx,
83 					      re_node_set *dest_nodes);
84 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85 					    re_node_set *dest_nodes,
86 					    const re_node_set *candidates);
87 static bool check_dst_limits (const re_match_context_t *mctx,
88 			      const re_node_set *limits,
89 			      Idx dst_node, Idx dst_idx, Idx src_node,
90 			      Idx src_idx);
91 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92 					int boundaries, Idx subexp_idx,
93 					Idx from_node, Idx bkref_idx);
94 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95 				      Idx limit, Idx subexp_idx,
96 				      Idx node, Idx str_idx,
97 				      Idx bkref_idx);
98 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99 					  re_node_set *dest_nodes,
100 					  const re_node_set *candidates,
101 					  re_node_set *limits,
102 					  struct re_backref_cache_entry *bkref_ents,
103 					  Idx str_idx);
104 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105 					re_sift_context_t *sctx,
106 					Idx str_idx, const re_node_set *candidates);
107 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108 					re_dfastate_t **dst,
109 					re_dfastate_t **src, Idx num);
110 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
111 					 re_match_context_t *mctx);
112 static re_dfastate_t *transit_state (reg_errcode_t *err,
113 				     re_match_context_t *mctx,
114 				     re_dfastate_t *state);
115 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
116 					    re_match_context_t *mctx,
117 					    re_dfastate_t *next_state);
118 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119 						re_node_set *cur_nodes,
120 						Idx str_idx);
121 #if 0
122 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
123 					re_match_context_t *mctx,
124 					re_dfastate_t *pstate);
125 #endif
126 #ifdef RE_ENABLE_I18N
127 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128 				       re_dfastate_t *pstate);
129 #endif /* RE_ENABLE_I18N */
130 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131 					  const re_node_set *nodes);
132 static reg_errcode_t get_subexp (re_match_context_t *mctx,
133 				 Idx bkref_node, Idx bkref_str_idx);
134 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135 				     const re_sub_match_top_t *sub_top,
136 				     re_sub_match_last_t *sub_last,
137 				     Idx bkref_node, Idx bkref_str);
138 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
139 			     Idx subexp_idx, int type);
140 static reg_errcode_t check_arrival (re_match_context_t *mctx,
141 				    state_array_t *path, Idx top_node,
142 				    Idx top_str, Idx last_node, Idx last_str,
143 				    int type);
144 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145 						   Idx str_idx,
146 						   re_node_set *cur_nodes,
147 						   re_node_set *next_nodes);
148 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149 					       re_node_set *cur_nodes,
150 					       Idx ex_subexp, int type);
151 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152 						   re_node_set *dst_nodes,
153 						   Idx target, Idx ex_subexp,
154 						   int type);
155 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156 					 re_node_set *cur_nodes, Idx cur_str,
157 					 Idx subexp_num, int type);
158 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
159 #ifdef RE_ENABLE_I18N
160 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161 				    const re_string_t *input, Idx idx);
162 # ifdef _LIBC
163 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164 						   size_t name_len);
165 # endif /* _LIBC */
166 #endif /* RE_ENABLE_I18N */
167 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168 				       const re_dfastate_t *state,
169 				       re_node_set *states_node,
170 				       bitset_t *states_ch);
171 static bool check_node_accept (const re_match_context_t *mctx,
172 			       const re_token_t *node, Idx idx);
173 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
174 
175 /* Entry point for POSIX code.  */
176 
177 /* regexec searches for a given pattern, specified by PREG, in the
178    string STRING.
179 
180    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
182    least NMATCH elements, and we set them to the offsets of the
183    corresponding matched substrings.
184 
185    EFLAGS specifies "execution flags" which affect matching: if
186    REG_NOTBOL is set, then ^ does not match at the beginning of the
187    string; if REG_NOTEOL is set, then $ does not match at the end.
188 
189    We return 0 if we find a match and REG_NOMATCH if not.  */
190 
191 int
192 regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
193 	 size_t nmatch, regmatch_t pmatch[], int eflags)
194 {
195   reg_errcode_t err;
196   Idx start, length;
197   re_dfa_t *dfa = preg->buffer;
198 
199   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
200     return REG_BADPAT;
201 
202   if (eflags & REG_STARTEND)
203     {
204       start = pmatch[0].rm_so;
205       length = pmatch[0].rm_eo;
206     }
207   else
208     {
209       start = 0;
210       length = strlen (string);
211     }
212 
213   lock_lock (dfa->lock);
214   if (preg->no_sub)
215     err = re_search_internal (preg, string, length, start, length,
216 			      length, 0, NULL, eflags);
217   else
218     err = re_search_internal (preg, string, length, start, length,
219 			      length, nmatch, pmatch, eflags);
220   lock_unlock (dfa->lock);
221   return err != REG_NOERROR;
222 }
223 
224 #ifdef _LIBC
225 libc_hidden_def (__regexec)
226 
227 # include <shlib-compat.h>
228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229 
230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231 __typeof__ (__regexec) __compat_regexec;
232 
233 int
234 attribute_compat_text_section
235 __compat_regexec (const regex_t *_Restrict_ preg,
236 		  const char *_Restrict_ string, size_t nmatch,
237 		  regmatch_t pmatch[], int eflags)
238 {
239   return regexec (preg, string, nmatch, pmatch,
240 		  eflags & (REG_NOTBOL | REG_NOTEOL));
241 }
242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243 # endif
244 #endif
245 
246 /* Entry points for GNU code.  */
247 
248 /* re_match, re_search, re_match_2, re_search_2
249 
250    The former two functions operate on STRING with length LENGTH,
251    while the later two operate on concatenation of STRING1 and STRING2
252    with lengths LENGTH1 and LENGTH2, respectively.
253 
254    re_match() matches the compiled pattern in BUFP against the string,
255    starting at index START.
256 
257    re_search() first tries matching at index START, then it tries to match
258    starting from index START + 1, and so on.  The last start position tried
259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
260    way as re_match().)
261 
262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263    the first STOP characters of the concatenation of the strings should be
264    concerned.
265 
266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
268    computed relative to the concatenation, not relative to the individual
269    strings.)
270 
271    On success, re_match* functions return the length of the match, re_search*
272    return the position of the start of the match.  Return value -1 means no
273    match was found and -2 indicates an internal error.  */
274 
275 regoff_t
276 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
277 	  Idx start, struct re_registers *regs)
278 {
279   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
280 }
281 #ifdef _LIBC
282 weak_alias (__re_match, re_match)
283 #endif
284 
285 regoff_t
286 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
287 	   Idx start, regoff_t range, struct re_registers *regs)
288 {
289   return re_search_stub (bufp, string, length, start, range, length, regs,
290 			 false);
291 }
292 #ifdef _LIBC
293 weak_alias (__re_search, re_search)
294 #endif
295 
296 regoff_t
297 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
298 	    const char *string2, Idx length2, Idx start,
299 	    struct re_registers *regs, Idx stop)
300 {
301   return re_search_2_stub (bufp, string1, length1, string2, length2,
302 			   start, 0, regs, stop, true);
303 }
304 #ifdef _LIBC
305 weak_alias (__re_match_2, re_match_2)
306 #endif
307 
308 regoff_t
309 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
310 	     const char *string2, Idx length2, Idx start, regoff_t range,
311 	     struct re_registers *regs, Idx stop)
312 {
313   return re_search_2_stub (bufp, string1, length1, string2, length2,
314 			   start, range, regs, stop, false);
315 }
316 #ifdef _LIBC
317 weak_alias (__re_search_2, re_search_2)
318 #endif
319 
320 static regoff_t
321 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
322 		  Idx length1, const char *string2, Idx length2, Idx start,
323 		  regoff_t range, struct re_registers *regs,
324 		  Idx stop, bool ret_len)
325 {
326   const char *str;
327   regoff_t rval;
328   Idx len;
329   char *s = NULL;
330 
331   if (BE ((length1 < 0 || length2 < 0 || stop < 0
332            || INT_ADD_WRAPV (length1, length2, &len)),
333           0))
334     return -2;
335 
336   /* Concatenate the strings.  */
337   if (length2 > 0)
338     if (length1 > 0)
339       {
340 	s = re_malloc (char, len);
341 
342 	if (BE (s == NULL, 0))
343 	  return -2;
344 #ifdef _LIBC
345 	memcpy (__mempcpy (s, string1, length1), string2, length2);
346 #else
347 	memcpy (s, string1, length1);
348 	memcpy (s + length1, string2, length2);
349 #endif
350 	str = s;
351       }
352     else
353       str = string2;
354   else
355     str = string1;
356 
357   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
358 			 ret_len);
359   re_free (s);
360   return rval;
361 }
362 
363 /* The parameters have the same meaning as those of re_search.
364    Additional parameters:
365    If RET_LEN is true the length of the match is returned (re_match style);
366    otherwise the position of the match is returned.  */
367 
368 static regoff_t
369 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
370 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
371 		bool ret_len)
372 {
373   reg_errcode_t result;
374   regmatch_t *pmatch;
375   Idx nregs;
376   regoff_t rval;
377   int eflags = 0;
378   re_dfa_t *dfa = bufp->buffer;
379   Idx last_start = start + range;
380 
381   /* Check for out-of-range.  */
382   if (BE (start < 0 || start > length, 0))
383     return -1;
384   if (BE (length < last_start || (0 <= range && last_start < start), 0))
385     last_start = length;
386   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
387     last_start = 0;
388 
389   lock_lock (dfa->lock);
390 
391   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
392   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
393 
394   /* Compile fastmap if we haven't yet.  */
395   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
396     re_compile_fastmap (bufp);
397 
398   if (BE (bufp->no_sub, 0))
399     regs = NULL;
400 
401   /* We need at least 1 register.  */
402   if (regs == NULL)
403     nregs = 1;
404   else if (BE (bufp->regs_allocated == REGS_FIXED
405 	       && regs->num_regs <= bufp->re_nsub, 0))
406     {
407       nregs = regs->num_regs;
408       if (BE (nregs < 1, 0))
409 	{
410 	  /* Nothing can be copied to regs.  */
411 	  regs = NULL;
412 	  nregs = 1;
413 	}
414     }
415   else
416     nregs = bufp->re_nsub + 1;
417   pmatch = re_malloc (regmatch_t, nregs);
418   if (BE (pmatch == NULL, 0))
419     {
420       rval = -2;
421       goto out;
422     }
423 
424   result = re_search_internal (bufp, string, length, start, last_start, stop,
425 			       nregs, pmatch, eflags);
426 
427   rval = 0;
428 
429   /* I hope we needn't fill their regs with -1's when no match was found.  */
430   if (result != REG_NOERROR)
431     rval = result == REG_NOMATCH ? -1 : -2;
432   else if (regs != NULL)
433     {
434       /* If caller wants register contents data back, copy them.  */
435       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
436 					   bufp->regs_allocated);
437       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
438 	rval = -2;
439     }
440 
441   if (BE (rval == 0, 1))
442     {
443       if (ret_len)
444 	{
445 	  assert (pmatch[0].rm_so == start);
446 	  rval = pmatch[0].rm_eo - start;
447 	}
448       else
449 	rval = pmatch[0].rm_so;
450     }
451   re_free (pmatch);
452  out:
453   lock_unlock (dfa->lock);
454   return rval;
455 }
456 
457 static unsigned
458 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
459 	      int regs_allocated)
460 {
461   int rval = REGS_REALLOCATE;
462   Idx i;
463   Idx need_regs = nregs + 1;
464   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
465      uses.  */
466 
467   /* Have the register data arrays been allocated?  */
468   if (regs_allocated == REGS_UNALLOCATED)
469     { /* No.  So allocate them with malloc.  */
470       regs->start = re_malloc (regoff_t, need_regs);
471       if (BE (regs->start == NULL, 0))
472 	return REGS_UNALLOCATED;
473       regs->end = re_malloc (regoff_t, need_regs);
474       if (BE (regs->end == NULL, 0))
475 	{
476 	  re_free (regs->start);
477 	  return REGS_UNALLOCATED;
478 	}
479       regs->num_regs = need_regs;
480     }
481   else if (regs_allocated == REGS_REALLOCATE)
482     { /* Yes.  If we need more elements than were already
483 	 allocated, reallocate them.  If we need fewer, just
484 	 leave it alone.  */
485       if (BE (need_regs > regs->num_regs, 0))
486 	{
487 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
488 	  regoff_t *new_end;
489 	  if (BE (new_start == NULL, 0))
490 	    return REGS_UNALLOCATED;
491 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
492 	  if (BE (new_end == NULL, 0))
493 	    {
494 	      re_free (new_start);
495 	      return REGS_UNALLOCATED;
496 	    }
497 	  regs->start = new_start;
498 	  regs->end = new_end;
499 	  regs->num_regs = need_regs;
500 	}
501     }
502   else
503     {
504       assert (regs_allocated == REGS_FIXED);
505       /* This function may not be called with REGS_FIXED and nregs too big.  */
506       assert (regs->num_regs >= nregs);
507       rval = REGS_FIXED;
508     }
509 
510   /* Copy the regs.  */
511   for (i = 0; i < nregs; ++i)
512     {
513       regs->start[i] = pmatch[i].rm_so;
514       regs->end[i] = pmatch[i].rm_eo;
515     }
516   for ( ; i < regs->num_regs; ++i)
517     regs->start[i] = regs->end[i] = -1;
518 
519   return rval;
520 }
521 
522 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
523    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
524    this memory for recording register information.  STARTS and ENDS
525    must be allocated using the malloc library routine, and must each
526    be at least NUM_REGS * sizeof (regoff_t) bytes long.
527 
528    If NUM_REGS == 0, then subsequent matches should allocate their own
529    register data.
530 
531    Unless this function is called, the first search or match using
532    PATTERN_BUFFER will allocate its own register data, without
533    freeing the old data.  */
534 
535 void
536 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
537 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
538 {
539   if (num_regs)
540     {
541       bufp->regs_allocated = REGS_REALLOCATE;
542       regs->num_regs = num_regs;
543       regs->start = starts;
544       regs->end = ends;
545     }
546   else
547     {
548       bufp->regs_allocated = REGS_UNALLOCATED;
549       regs->num_regs = 0;
550       regs->start = regs->end = NULL;
551     }
552 }
553 #ifdef _LIBC
554 weak_alias (__re_set_registers, re_set_registers)
555 #endif
556 
557 /* Entry points compatible with 4.2 BSD regex library.  We don't define
558    them unless specifically requested.  */
559 
560 #if defined _REGEX_RE_COMP || defined _LIBC
561 int
562 # ifdef _LIBC
563 weak_function
564 # endif
565 re_exec (const char *s)
566 {
567   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
568 }
569 #endif /* _REGEX_RE_COMP */
570 
571 /* Internal entry point.  */
572 
573 /* Searches for a compiled pattern PREG in the string STRING, whose
574    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
575    meaning as with regexec.  LAST_START is START + RANGE, where
576    START and RANGE have the same meaning as with re_search.
577    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
578    otherwise return the error code.
579    Note: We assume front end functions already check ranges.
580    (0 <= LAST_START && LAST_START <= LENGTH)  */
581 
582 static reg_errcode_t
583 __attribute_warn_unused_result__
584 re_search_internal (const regex_t *preg, const char *string, Idx length,
585 		    Idx start, Idx last_start, Idx stop, size_t nmatch,
586 		    regmatch_t pmatch[], int eflags)
587 {
588   reg_errcode_t err;
589   const re_dfa_t *dfa = preg->buffer;
590   Idx left_lim, right_lim;
591   int incr;
592   bool fl_longest_match;
593   int match_kind;
594   Idx match_first;
595   Idx match_last = -1;
596   Idx extra_nmatch;
597   bool sb;
598   int ch;
599 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
600   re_match_context_t mctx = { .dfa = dfa };
601 #else
602   re_match_context_t mctx;
603 #endif
604   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
605 		    && start != last_start && !preg->can_be_null)
606 		   ? preg->fastmap : NULL);
607   RE_TRANSLATE_TYPE t = preg->translate;
608 
609 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
610   memset (&mctx, '\0', sizeof (re_match_context_t));
611   mctx.dfa = dfa;
612 #endif
613 
614   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
615   nmatch -= extra_nmatch;
616 
617   /* Check if the DFA haven't been compiled.  */
618   if (BE (preg->used == 0 || dfa->init_state == NULL
619 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
620 	  || dfa->init_state_begbuf == NULL, 0))
621     return REG_NOMATCH;
622 
623 #ifdef DEBUG
624   /* We assume front-end functions already check them.  */
625   assert (0 <= last_start && last_start <= length);
626 #endif
627 
628   /* If initial states with non-begbuf contexts have no elements,
629      the regex must be anchored.  If preg->newline_anchor is set,
630      we'll never use init_state_nl, so do not check it.  */
631   if (dfa->init_state->nodes.nelem == 0
632       && dfa->init_state_word->nodes.nelem == 0
633       && (dfa->init_state_nl->nodes.nelem == 0
634 	  || !preg->newline_anchor))
635     {
636       if (start != 0 && last_start != 0)
637         return REG_NOMATCH;
638       start = last_start = 0;
639     }
640 
641   /* We must check the longest matching, if nmatch > 0.  */
642   fl_longest_match = (nmatch != 0 || dfa->nbackref);
643 
644   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
645 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
646 			    dfa);
647   if (BE (err != REG_NOERROR, 0))
648     goto free_return;
649   mctx.input.stop = stop;
650   mctx.input.raw_stop = stop;
651   mctx.input.newline_anchor = preg->newline_anchor;
652 
653   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
654   if (BE (err != REG_NOERROR, 0))
655     goto free_return;
656 
657   /* We will log all the DFA states through which the dfa pass,
658      if nmatch > 1, or this dfa has "multibyte node", which is a
659      back-reference or a node which can accept multibyte character or
660      multi character collating element.  */
661   if (nmatch > 1 || dfa->has_mb_node)
662     {
663       /* Avoid overflow.  */
664       if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
665                <= mctx.input.bufs_len), 0))
666 	{
667 	  err = REG_ESPACE;
668 	  goto free_return;
669 	}
670 
671       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
672       if (BE (mctx.state_log == NULL, 0))
673 	{
674 	  err = REG_ESPACE;
675 	  goto free_return;
676 	}
677     }
678   else
679     mctx.state_log = NULL;
680 
681   match_first = start;
682   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
683 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
684 
685   /* Check incrementally whether the input string matches.  */
686   incr = (last_start < start) ? -1 : 1;
687   left_lim = (last_start < start) ? last_start : start;
688   right_lim = (last_start < start) ? start : last_start;
689   sb = dfa->mb_cur_max == 1;
690   match_kind =
691     (fastmap
692      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
693 	| (start <= last_start ? 2 : 0)
694 	| (t != NULL ? 1 : 0))
695      : 8);
696 
697   for (;; match_first += incr)
698     {
699       err = REG_NOMATCH;
700       if (match_first < left_lim || right_lim < match_first)
701 	goto free_return;
702 
703       /* Advance as rapidly as possible through the string, until we
704 	 find a plausible place to start matching.  This may be done
705 	 with varying efficiency, so there are various possibilities:
706 	 only the most common of them are specialized, in order to
707 	 save on code size.  We use a switch statement for speed.  */
708       switch (match_kind)
709 	{
710 	case 8:
711 	  /* No fastmap.  */
712 	  break;
713 
714 	case 7:
715 	  /* Fastmap with single-byte translation, match forward.  */
716 	  while (BE (match_first < right_lim, 1)
717 		 && !fastmap[t[(unsigned char) string[match_first]]])
718 	    ++match_first;
719 	  goto forward_match_found_start_or_reached_end;
720 
721 	case 6:
722 	  /* Fastmap without translation, match forward.  */
723 	  while (BE (match_first < right_lim, 1)
724 		 && !fastmap[(unsigned char) string[match_first]])
725 	    ++match_first;
726 
727 	forward_match_found_start_or_reached_end:
728 	  if (BE (match_first == right_lim, 0))
729 	    {
730 	      ch = match_first >= length
731 		       ? 0 : (unsigned char) string[match_first];
732 	      if (!fastmap[t ? t[ch] : ch])
733 		goto free_return;
734 	    }
735 	  break;
736 
737 	case 4:
738 	case 5:
739 	  /* Fastmap without multi-byte translation, match backwards.  */
740 	  while (match_first >= left_lim)
741 	    {
742 	      ch = match_first >= length
743 		       ? 0 : (unsigned char) string[match_first];
744 	      if (fastmap[t ? t[ch] : ch])
745 		break;
746 	      --match_first;
747 	    }
748 	  if (match_first < left_lim)
749 	    goto free_return;
750 	  break;
751 
752 	default:
753 	  /* In this case, we can't determine easily the current byte,
754 	     since it might be a component byte of a multibyte
755 	     character.  Then we use the constructed buffer instead.  */
756 	  for (;;)
757 	    {
758 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
759 		 buffers.  */
760 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
761 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
762 		{
763 		  err = re_string_reconstruct (&mctx.input, match_first,
764 					       eflags);
765 		  if (BE (err != REG_NOERROR, 0))
766 		    goto free_return;
767 
768 		  offset = match_first - mctx.input.raw_mbs_idx;
769 		}
770 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
771 		 Note that MATCH_FIRST must not be smaller than 0.  */
772 	      ch = (match_first >= length
773 		    ? 0 : re_string_byte_at (&mctx.input, offset));
774 	      if (fastmap[ch])
775 		break;
776 	      match_first += incr;
777 	      if (match_first < left_lim || match_first > right_lim)
778 		{
779 		  err = REG_NOMATCH;
780 		  goto free_return;
781 		}
782 	    }
783 	  break;
784 	}
785 
786       /* Reconstruct the buffers so that the matcher can assume that
787 	 the matching starts from the beginning of the buffer.  */
788       err = re_string_reconstruct (&mctx.input, match_first, eflags);
789       if (BE (err != REG_NOERROR, 0))
790 	goto free_return;
791 
792 #ifdef RE_ENABLE_I18N
793      /* Don't consider this char as a possible match start if it part,
794 	yet isn't the head, of a multibyte character.  */
795       if (!sb && !re_string_first_byte (&mctx.input, 0))
796 	continue;
797 #endif
798 
799       /* It seems to be appropriate one, then use the matcher.  */
800       /* We assume that the matching starts from 0.  */
801       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
802       match_last = check_matching (&mctx, fl_longest_match,
803 				   start <= last_start ? &match_first : NULL);
804       if (match_last != -1)
805 	{
806 	  if (BE (match_last == -2, 0))
807 	    {
808 	      err = REG_ESPACE;
809 	      goto free_return;
810 	    }
811 	  else
812 	    {
813 	      mctx.match_last = match_last;
814 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
815 		{
816 		  re_dfastate_t *pstate = mctx.state_log[match_last];
817 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
818 							     match_last);
819 		}
820 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
821 		  || dfa->nbackref)
822 		{
823 		  err = prune_impossible_nodes (&mctx);
824 		  if (err == REG_NOERROR)
825 		    break;
826 		  if (BE (err != REG_NOMATCH, 0))
827 		    goto free_return;
828 		  match_last = -1;
829 		}
830 	      else
831 		break; /* We found a match.  */
832 	    }
833 	}
834 
835       match_ctx_clean (&mctx);
836     }
837 
838 #ifdef DEBUG
839   assert (match_last != -1);
840   assert (err == REG_NOERROR);
841 #endif
842 
843   /* Set pmatch[] if we need.  */
844   if (nmatch > 0)
845     {
846       Idx reg_idx;
847 
848       /* Initialize registers.  */
849       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
850 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
851 
852       /* Set the points where matching start/end.  */
853       pmatch[0].rm_so = 0;
854       pmatch[0].rm_eo = mctx.match_last;
855       /* FIXME: This function should fail if mctx.match_last exceeds
856 	 the maximum possible regoff_t value.  We need a new error
857 	 code REG_OVERFLOW.  */
858 
859       if (!preg->no_sub && nmatch > 1)
860 	{
861 	  err = set_regs (preg, &mctx, nmatch, pmatch,
862 			  dfa->has_plural_match && dfa->nbackref > 0);
863 	  if (BE (err != REG_NOERROR, 0))
864 	    goto free_return;
865 	}
866 
867       /* At last, add the offset to each register, since we slid
868 	 the buffers so that we could assume that the matching starts
869 	 from 0.  */
870       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
871 	if (pmatch[reg_idx].rm_so != -1)
872 	  {
873 #ifdef RE_ENABLE_I18N
874 	    if (BE (mctx.input.offsets_needed != 0, 0))
875 	      {
876 		pmatch[reg_idx].rm_so =
877 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
878 		   ? mctx.input.valid_raw_len
879 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
880 		pmatch[reg_idx].rm_eo =
881 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
882 		   ? mctx.input.valid_raw_len
883 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
884 	      }
885 #else
886 	    assert (mctx.input.offsets_needed == 0);
887 #endif
888 	    pmatch[reg_idx].rm_so += match_first;
889 	    pmatch[reg_idx].rm_eo += match_first;
890 	  }
891       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
892 	{
893 	  pmatch[nmatch + reg_idx].rm_so = -1;
894 	  pmatch[nmatch + reg_idx].rm_eo = -1;
895 	}
896 
897       if (dfa->subexp_map)
898 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
899 	  if (dfa->subexp_map[reg_idx] != reg_idx)
900 	    {
901 	      pmatch[reg_idx + 1].rm_so
902 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
903 	      pmatch[reg_idx + 1].rm_eo
904 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
905 	    }
906     }
907 
908  free_return:
909   re_free (mctx.state_log);
910   if (dfa->nbackref)
911     match_ctx_free (&mctx);
912   re_string_destruct (&mctx.input);
913   return err;
914 }
915 
916 static reg_errcode_t
917 __attribute_warn_unused_result__
918 prune_impossible_nodes (re_match_context_t *mctx)
919 {
920   const re_dfa_t *const dfa = mctx->dfa;
921   Idx halt_node, match_last;
922   reg_errcode_t ret;
923   re_dfastate_t **sifted_states;
924   re_dfastate_t **lim_states = NULL;
925   re_sift_context_t sctx;
926 #ifdef DEBUG
927   assert (mctx->state_log != NULL);
928 #endif
929   match_last = mctx->match_last;
930   halt_node = mctx->last_node;
931 
932   /* Avoid overflow.  */
933   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
934     return REG_ESPACE;
935 
936   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
937   if (BE (sifted_states == NULL, 0))
938     {
939       ret = REG_ESPACE;
940       goto free_return;
941     }
942   if (dfa->nbackref)
943     {
944       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
945       if (BE (lim_states == NULL, 0))
946 	{
947 	  ret = REG_ESPACE;
948 	  goto free_return;
949 	}
950       while (1)
951 	{
952 	  memset (lim_states, '\0',
953 		  sizeof (re_dfastate_t *) * (match_last + 1));
954 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
955 			 match_last);
956 	  ret = sift_states_backward (mctx, &sctx);
957 	  re_node_set_free (&sctx.limits);
958 	  if (BE (ret != REG_NOERROR, 0))
959 	      goto free_return;
960 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
961 	    break;
962 	  do
963 	    {
964 	      --match_last;
965 	      if (match_last < 0)
966 		{
967 		  ret = REG_NOMATCH;
968 		  goto free_return;
969 		}
970 	    } while (mctx->state_log[match_last] == NULL
971 		     || !mctx->state_log[match_last]->halt);
972 	  halt_node = check_halt_state_context (mctx,
973 						mctx->state_log[match_last],
974 						match_last);
975 	}
976       ret = merge_state_array (dfa, sifted_states, lim_states,
977 			       match_last + 1);
978       re_free (lim_states);
979       lim_states = NULL;
980       if (BE (ret != REG_NOERROR, 0))
981 	goto free_return;
982     }
983   else
984     {
985       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
986       ret = sift_states_backward (mctx, &sctx);
987       re_node_set_free (&sctx.limits);
988       if (BE (ret != REG_NOERROR, 0))
989 	goto free_return;
990       if (sifted_states[0] == NULL)
991 	{
992 	  ret = REG_NOMATCH;
993 	  goto free_return;
994 	}
995     }
996   re_free (mctx->state_log);
997   mctx->state_log = sifted_states;
998   sifted_states = NULL;
999   mctx->last_node = halt_node;
1000   mctx->match_last = match_last;
1001   ret = REG_NOERROR;
1002  free_return:
1003   re_free (sifted_states);
1004   re_free (lim_states);
1005   return ret;
1006 }
1007 
1008 /* Acquire an initial state and return it.
1009    We must select appropriate initial state depending on the context,
1010    since initial states may have constraints like "\<", "^", etc..  */
1011 
1012 static inline re_dfastate_t *
1013 __attribute__ ((always_inline))
1014 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1015 			    Idx idx)
1016 {
1017   const re_dfa_t *const dfa = mctx->dfa;
1018   if (dfa->init_state->has_constraint)
1019     {
1020       unsigned int context;
1021       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1022       if (IS_WORD_CONTEXT (context))
1023 	return dfa->init_state_word;
1024       else if (IS_ORDINARY_CONTEXT (context))
1025 	return dfa->init_state;
1026       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1027 	return dfa->init_state_begbuf;
1028       else if (IS_NEWLINE_CONTEXT (context))
1029 	return dfa->init_state_nl;
1030       else if (IS_BEGBUF_CONTEXT (context))
1031 	{
1032 	  /* It is relatively rare case, then calculate on demand.  */
1033 	  return re_acquire_state_context (err, dfa,
1034 					   dfa->init_state->entrance_nodes,
1035 					   context);
1036 	}
1037       else
1038 	/* Must not happen?  */
1039 	return dfa->init_state;
1040     }
1041   else
1042     return dfa->init_state;
1043 }
1044 
1045 /* Check whether the regular expression match input string INPUT or not,
1046    and return the index where the matching end.  Return -1 if
1047    there is no match, and return -2 in case of an error.
1048    FL_LONGEST_MATCH means we want the POSIX longest matching.
1049    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1050    next place where we may want to try matching.
1051    Note that the matcher assumes that the matching starts from the current
1052    index of the buffer.  */
1053 
1054 static Idx
1055 __attribute_warn_unused_result__
1056 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1057 		Idx *p_match_first)
1058 {
1059   const re_dfa_t *const dfa = mctx->dfa;
1060   reg_errcode_t err;
1061   Idx match = 0;
1062   Idx match_last = -1;
1063   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1064   re_dfastate_t *cur_state;
1065   bool at_init_state = p_match_first != NULL;
1066   Idx next_start_idx = cur_str_idx;
1067 
1068   err = REG_NOERROR;
1069   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1070   /* An initial state must not be NULL (invalid).  */
1071   if (BE (cur_state == NULL, 0))
1072     {
1073       assert (err == REG_ESPACE);
1074       return -2;
1075     }
1076 
1077   if (mctx->state_log != NULL)
1078     {
1079       mctx->state_log[cur_str_idx] = cur_state;
1080 
1081       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1082 	 later.  E.g. Processing back references.  */
1083       if (BE (dfa->nbackref, 0))
1084 	{
1085 	  at_init_state = false;
1086 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1087 	  if (BE (err != REG_NOERROR, 0))
1088 	    return err;
1089 
1090 	  if (cur_state->has_backref)
1091 	    {
1092 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1093 	      if (BE (err != REG_NOERROR, 0))
1094 		return err;
1095 	    }
1096 	}
1097     }
1098 
1099   /* If the RE accepts NULL string.  */
1100   if (BE (cur_state->halt, 0))
1101     {
1102       if (!cur_state->has_constraint
1103 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1104 	{
1105 	  if (!fl_longest_match)
1106 	    return cur_str_idx;
1107 	  else
1108 	    {
1109 	      match_last = cur_str_idx;
1110 	      match = 1;
1111 	    }
1112 	}
1113     }
1114 
1115   while (!re_string_eoi (&mctx->input))
1116     {
1117       re_dfastate_t *old_state = cur_state;
1118       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1119 
1120       if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1121 	   && mctx->input.bufs_len < mctx->input.len)
1122 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1123 	      && mctx->input.valid_len < mctx->input.len))
1124 	{
1125 	  err = extend_buffers (mctx, next_char_idx + 1);
1126 	  if (BE (err != REG_NOERROR, 0))
1127 	    {
1128 	      assert (err == REG_ESPACE);
1129 	      return -2;
1130 	    }
1131 	}
1132 
1133       cur_state = transit_state (&err, mctx, cur_state);
1134       if (mctx->state_log != NULL)
1135 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1136 
1137       if (cur_state == NULL)
1138 	{
1139 	  /* Reached the invalid state or an error.  Try to recover a valid
1140 	     state using the state log, if available and if we have not
1141 	     already found a valid (even if not the longest) match.  */
1142 	  if (BE (err != REG_NOERROR, 0))
1143 	    return -2;
1144 
1145 	  if (mctx->state_log == NULL
1146 	      || (match && !fl_longest_match)
1147 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1148 	    break;
1149 	}
1150 
1151       if (BE (at_init_state, 0))
1152 	{
1153 	  if (old_state == cur_state)
1154 	    next_start_idx = next_char_idx;
1155 	  else
1156 	    at_init_state = false;
1157 	}
1158 
1159       if (cur_state->halt)
1160 	{
1161 	  /* Reached a halt state.
1162 	     Check the halt state can satisfy the current context.  */
1163 	  if (!cur_state->has_constraint
1164 	      || check_halt_state_context (mctx, cur_state,
1165 					   re_string_cur_idx (&mctx->input)))
1166 	    {
1167 	      /* We found an appropriate halt state.  */
1168 	      match_last = re_string_cur_idx (&mctx->input);
1169 	      match = 1;
1170 
1171 	      /* We found a match, do not modify match_first below.  */
1172 	      p_match_first = NULL;
1173 	      if (!fl_longest_match)
1174 		break;
1175 	    }
1176 	}
1177     }
1178 
1179   if (p_match_first)
1180     *p_match_first += next_start_idx;
1181 
1182   return match_last;
1183 }
1184 
1185 /* Check NODE match the current context.  */
1186 
1187 static bool
1188 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1189 {
1190   re_token_type_t type = dfa->nodes[node].type;
1191   unsigned int constraint = dfa->nodes[node].constraint;
1192   if (type != END_OF_RE)
1193     return false;
1194   if (!constraint)
1195     return true;
1196   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1197     return false;
1198   return true;
1199 }
1200 
1201 /* Check the halt state STATE match the current context.
1202    Return 0 if not match, if the node, STATE has, is a halt node and
1203    match the context, return the node.  */
1204 
1205 static Idx
1206 check_halt_state_context (const re_match_context_t *mctx,
1207 			  const re_dfastate_t *state, Idx idx)
1208 {
1209   Idx i;
1210   unsigned int context;
1211 #ifdef DEBUG
1212   assert (state->halt);
1213 #endif
1214   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1215   for (i = 0; i < state->nodes.nelem; ++i)
1216     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1217       return state->nodes.elems[i];
1218   return 0;
1219 }
1220 
1221 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1222    corresponding to the DFA).
1223    Return the destination node, and update EPS_VIA_NODES;
1224    return -1 in case of errors.  */
1225 
1226 static Idx
1227 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1228 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1229 		   struct re_fail_stack_t *fs)
1230 {
1231   const re_dfa_t *const dfa = mctx->dfa;
1232   Idx i;
1233   bool ok;
1234   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1235     {
1236       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1237       re_node_set *edests = &dfa->edests[node];
1238       Idx dest_node;
1239       ok = re_node_set_insert (eps_via_nodes, node);
1240       if (BE (! ok, 0))
1241 	return -2;
1242       /* Pick up a valid destination, or return -1 if none
1243 	 is found.  */
1244       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1245 	{
1246 	  Idx candidate = edests->elems[i];
1247 	  if (!re_node_set_contains (cur_nodes, candidate))
1248 	    continue;
1249           if (dest_node == -1)
1250 	    dest_node = candidate;
1251 
1252 	  else
1253 	    {
1254 	      /* In order to avoid infinite loop like "(a*)*", return the second
1255 		 epsilon-transition if the first was already considered.  */
1256 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1257 		return candidate;
1258 
1259 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1260 	      else if (fs != NULL
1261 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1262 					   eps_via_nodes))
1263 		return -2;
1264 
1265 	      /* We know we are going to exit.  */
1266 	      break;
1267 	    }
1268 	}
1269       return dest_node;
1270     }
1271   else
1272     {
1273       Idx naccepted = 0;
1274       re_token_type_t type = dfa->nodes[node].type;
1275 
1276 #ifdef RE_ENABLE_I18N
1277       if (dfa->nodes[node].accept_mb)
1278 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1279       else
1280 #endif /* RE_ENABLE_I18N */
1281       if (type == OP_BACK_REF)
1282 	{
1283 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1284 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1285 	  if (fs != NULL)
1286 	    {
1287 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1288 		return -1;
1289 	      else if (naccepted)
1290 		{
1291 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1292 		  if (mctx->input.valid_len - *pidx < naccepted
1293 		      || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1294 				  naccepted)
1295 			  != 0))
1296 		    return -1;
1297 		}
1298 	    }
1299 
1300 	  if (naccepted == 0)
1301 	    {
1302 	      Idx dest_node;
1303 	      ok = re_node_set_insert (eps_via_nodes, node);
1304 	      if (BE (! ok, 0))
1305 		return -2;
1306 	      dest_node = dfa->edests[node].elems[0];
1307 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1308 					dest_node))
1309 		return dest_node;
1310 	    }
1311 	}
1312 
1313       if (naccepted != 0
1314 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1315 	{
1316 	  Idx dest_node = dfa->nexts[node];
1317 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1318 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1319 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1320 					       dest_node)))
1321 	    return -1;
1322 	  re_node_set_empty (eps_via_nodes);
1323 	  return dest_node;
1324 	}
1325     }
1326   return -1;
1327 }
1328 
1329 static reg_errcode_t
1330 __attribute_warn_unused_result__
1331 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1332 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1333 {
1334   reg_errcode_t err;
1335   Idx num = fs->num++;
1336   if (fs->num == fs->alloc)
1337     {
1338       struct re_fail_stack_ent_t *new_array;
1339       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1340                               fs->alloc * 2);
1341       if (new_array == NULL)
1342 	return REG_ESPACE;
1343       fs->alloc *= 2;
1344       fs->stack = new_array;
1345     }
1346   fs->stack[num].idx = str_idx;
1347   fs->stack[num].node = dest_node;
1348   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1349   if (fs->stack[num].regs == NULL)
1350     return REG_ESPACE;
1351   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1352   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1353   return err;
1354 }
1355 
1356 static Idx
1357 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1358 		regmatch_t *regs, re_node_set *eps_via_nodes)
1359 {
1360   Idx num = --fs->num;
1361   assert (num >= 0);
1362   *pidx = fs->stack[num].idx;
1363   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1364   re_node_set_free (eps_via_nodes);
1365   re_free (fs->stack[num].regs);
1366   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1367   return fs->stack[num].node;
1368 }
1369 
1370 /* Set the positions where the subexpressions are starts/ends to registers
1371    PMATCH.
1372    Note: We assume that pmatch[0] is already set, and
1373    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1374 
1375 static reg_errcode_t
1376 __attribute_warn_unused_result__
1377 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1378 	  regmatch_t *pmatch, bool fl_backtrack)
1379 {
1380   const re_dfa_t *dfa = preg->buffer;
1381   Idx idx, cur_node;
1382   re_node_set eps_via_nodes;
1383   struct re_fail_stack_t *fs;
1384   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1385   regmatch_t *prev_idx_match;
1386   bool prev_idx_match_malloced = false;
1387 
1388 #ifdef DEBUG
1389   assert (nmatch > 1);
1390   assert (mctx->state_log != NULL);
1391 #endif
1392   if (fl_backtrack)
1393     {
1394       fs = &fs_body;
1395       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1396       if (fs->stack == NULL)
1397 	return REG_ESPACE;
1398     }
1399   else
1400     fs = NULL;
1401 
1402   cur_node = dfa->init_node;
1403   re_node_set_init_empty (&eps_via_nodes);
1404 
1405   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1406     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1407   else
1408     {
1409       prev_idx_match = re_malloc (regmatch_t, nmatch);
1410       if (prev_idx_match == NULL)
1411 	{
1412 	  free_fail_stack_return (fs);
1413 	  return REG_ESPACE;
1414 	}
1415       prev_idx_match_malloced = true;
1416     }
1417   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1418 
1419   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1420     {
1421       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1422 
1423       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1424 	{
1425 	  Idx reg_idx;
1426 	  if (fs)
1427 	    {
1428 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1429 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1430 		  break;
1431 	      if (reg_idx == nmatch)
1432 		{
1433 		  re_node_set_free (&eps_via_nodes);
1434 		  if (prev_idx_match_malloced)
1435 		    re_free (prev_idx_match);
1436 		  return free_fail_stack_return (fs);
1437 		}
1438 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1439 					 &eps_via_nodes);
1440 	    }
1441 	  else
1442 	    {
1443 	      re_node_set_free (&eps_via_nodes);
1444 	      if (prev_idx_match_malloced)
1445 		re_free (prev_idx_match);
1446 	      return REG_NOERROR;
1447 	    }
1448 	}
1449 
1450       /* Proceed to next node.  */
1451       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1452 				    &eps_via_nodes, fs);
1453 
1454       if (BE (cur_node < 0, 0))
1455 	{
1456 	  if (BE (cur_node == -2, 0))
1457 	    {
1458 	      re_node_set_free (&eps_via_nodes);
1459 	      if (prev_idx_match_malloced)
1460 		re_free (prev_idx_match);
1461 	      free_fail_stack_return (fs);
1462 	      return REG_ESPACE;
1463 	    }
1464 	  if (fs)
1465 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1466 				       &eps_via_nodes);
1467 	  else
1468 	    {
1469 	      re_node_set_free (&eps_via_nodes);
1470 	      if (prev_idx_match_malloced)
1471 		re_free (prev_idx_match);
1472 	      return REG_NOMATCH;
1473 	    }
1474 	}
1475     }
1476   re_node_set_free (&eps_via_nodes);
1477   if (prev_idx_match_malloced)
1478     re_free (prev_idx_match);
1479   return free_fail_stack_return (fs);
1480 }
1481 
1482 static reg_errcode_t
1483 free_fail_stack_return (struct re_fail_stack_t *fs)
1484 {
1485   if (fs)
1486     {
1487       Idx fs_idx;
1488       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1489 	{
1490 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1491 	  re_free (fs->stack[fs_idx].regs);
1492 	}
1493       re_free (fs->stack);
1494     }
1495   return REG_NOERROR;
1496 }
1497 
1498 static void
1499 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1500 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1501 {
1502   int type = dfa->nodes[cur_node].type;
1503   if (type == OP_OPEN_SUBEXP)
1504     {
1505       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1506 
1507       /* We are at the first node of this sub expression.  */
1508       if (reg_num < nmatch)
1509 	{
1510 	  pmatch[reg_num].rm_so = cur_idx;
1511 	  pmatch[reg_num].rm_eo = -1;
1512 	}
1513     }
1514   else if (type == OP_CLOSE_SUBEXP)
1515     {
1516       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1517       if (reg_num < nmatch)
1518 	{
1519 	  /* We are at the last node of this sub expression.  */
1520 	  if (pmatch[reg_num].rm_so < cur_idx)
1521 	    {
1522 	      pmatch[reg_num].rm_eo = cur_idx;
1523 	      /* This is a non-empty match or we are not inside an optional
1524 		 subexpression.  Accept this right away.  */
1525 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1526 	    }
1527 	  else
1528 	    {
1529 	      if (dfa->nodes[cur_node].opt_subexp
1530 		  && prev_idx_match[reg_num].rm_so != -1)
1531 		/* We transited through an empty match for an optional
1532 		   subexpression, like (a?)*, and this is not the subexp's
1533 		   first match.  Copy back the old content of the registers
1534 		   so that matches of an inner subexpression are undone as
1535 		   well, like in ((a?))*.  */
1536 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1537 	      else
1538 		/* We completed a subexpression, but it may be part of
1539 		   an optional one, so do not update PREV_IDX_MATCH.  */
1540 		pmatch[reg_num].rm_eo = cur_idx;
1541 	    }
1542 	}
1543     }
1544 }
1545 
1546 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1547    and sift the nodes in each states according to the following rules.
1548    Updated state_log will be wrote to STATE_LOG.
1549 
1550    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1551      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1552 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1553 	the LAST_NODE, we throw away the node 'a'.
1554      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1555 	string 's' and transit to 'b':
1556 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1557 	   away the node 'a'.
1558 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1559 	    thrown away, we throw away the node 'a'.
1560      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1561 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1562 	   node 'a'.
1563 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1564 	    we throw away the node 'a'.  */
1565 
1566 #define STATE_NODE_CONTAINS(state,node) \
1567   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1568 
1569 static reg_errcode_t
1570 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1571 {
1572   reg_errcode_t err;
1573   int null_cnt = 0;
1574   Idx str_idx = sctx->last_str_idx;
1575   re_node_set cur_dest;
1576 
1577 #ifdef DEBUG
1578   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1579 #endif
1580 
1581   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1582      transit to the last_node and the last_node itself.  */
1583   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1584   if (BE (err != REG_NOERROR, 0))
1585     return err;
1586   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1587   if (BE (err != REG_NOERROR, 0))
1588     goto free_return;
1589 
1590   /* Then check each states in the state_log.  */
1591   while (str_idx > 0)
1592     {
1593       /* Update counters.  */
1594       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1595       if (null_cnt > mctx->max_mb_elem_len)
1596 	{
1597 	  memset (sctx->sifted_states, '\0',
1598 		  sizeof (re_dfastate_t *) * str_idx);
1599 	  re_node_set_free (&cur_dest);
1600 	  return REG_NOERROR;
1601 	}
1602       re_node_set_empty (&cur_dest);
1603       --str_idx;
1604 
1605       if (mctx->state_log[str_idx])
1606 	{
1607 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1608 	  if (BE (err != REG_NOERROR, 0))
1609 	    goto free_return;
1610 	}
1611 
1612       /* Add all the nodes which satisfy the following conditions:
1613 	 - It can epsilon transit to a node in CUR_DEST.
1614 	 - It is in CUR_SRC.
1615 	 And update state_log.  */
1616       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1617       if (BE (err != REG_NOERROR, 0))
1618 	goto free_return;
1619     }
1620   err = REG_NOERROR;
1621  free_return:
1622   re_node_set_free (&cur_dest);
1623   return err;
1624 }
1625 
1626 static reg_errcode_t
1627 __attribute_warn_unused_result__
1628 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1629 		     Idx str_idx, re_node_set *cur_dest)
1630 {
1631   const re_dfa_t *const dfa = mctx->dfa;
1632   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1633   Idx i;
1634 
1635   /* Then build the next sifted state.
1636      We build the next sifted state on 'cur_dest', and update
1637      'sifted_states[str_idx]' with 'cur_dest'.
1638      Note:
1639      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1640      'cur_src' points the node_set of the old 'state_log[str_idx]'
1641      (with the epsilon nodes pre-filtered out).  */
1642   for (i = 0; i < cur_src->nelem; i++)
1643     {
1644       Idx prev_node = cur_src->elems[i];
1645       int naccepted = 0;
1646       bool ok;
1647 
1648 #ifdef DEBUG
1649       re_token_type_t type = dfa->nodes[prev_node].type;
1650       assert (!IS_EPSILON_NODE (type));
1651 #endif
1652 #ifdef RE_ENABLE_I18N
1653       /* If the node may accept "multi byte".  */
1654       if (dfa->nodes[prev_node].accept_mb)
1655 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1656 					 str_idx, sctx->last_str_idx);
1657 #endif /* RE_ENABLE_I18N */
1658 
1659       /* We don't check backreferences here.
1660 	 See update_cur_sifted_state().  */
1661       if (!naccepted
1662 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1663 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1664 				  dfa->nexts[prev_node]))
1665 	naccepted = 1;
1666 
1667       if (naccepted == 0)
1668 	continue;
1669 
1670       if (sctx->limits.nelem)
1671 	{
1672 	  Idx to_idx = str_idx + naccepted;
1673 	  if (check_dst_limits (mctx, &sctx->limits,
1674 				dfa->nexts[prev_node], to_idx,
1675 				prev_node, str_idx))
1676 	    continue;
1677 	}
1678       ok = re_node_set_insert (cur_dest, prev_node);
1679       if (BE (! ok, 0))
1680 	return REG_ESPACE;
1681     }
1682 
1683   return REG_NOERROR;
1684 }
1685 
1686 /* Helper functions.  */
1687 
1688 static reg_errcode_t
1689 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1690 {
1691   Idx top = mctx->state_log_top;
1692 
1693   if ((next_state_log_idx >= mctx->input.bufs_len
1694        && mctx->input.bufs_len < mctx->input.len)
1695       || (next_state_log_idx >= mctx->input.valid_len
1696 	  && mctx->input.valid_len < mctx->input.len))
1697     {
1698       reg_errcode_t err;
1699       err = extend_buffers (mctx, next_state_log_idx + 1);
1700       if (BE (err != REG_NOERROR, 0))
1701 	return err;
1702     }
1703 
1704   if (top < next_state_log_idx)
1705     {
1706       memset (mctx->state_log + top + 1, '\0',
1707 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1708       mctx->state_log_top = next_state_log_idx;
1709     }
1710   return REG_NOERROR;
1711 }
1712 
1713 static reg_errcode_t
1714 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1715 		   re_dfastate_t **src, Idx num)
1716 {
1717   Idx st_idx;
1718   reg_errcode_t err;
1719   for (st_idx = 0; st_idx < num; ++st_idx)
1720     {
1721       if (dst[st_idx] == NULL)
1722 	dst[st_idx] = src[st_idx];
1723       else if (src[st_idx] != NULL)
1724 	{
1725 	  re_node_set merged_set;
1726 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1727 					&src[st_idx]->nodes);
1728 	  if (BE (err != REG_NOERROR, 0))
1729 	    return err;
1730 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1731 	  re_node_set_free (&merged_set);
1732 	  if (BE (err != REG_NOERROR, 0))
1733 	    return err;
1734 	}
1735     }
1736   return REG_NOERROR;
1737 }
1738 
1739 static reg_errcode_t
1740 update_cur_sifted_state (const re_match_context_t *mctx,
1741 			 re_sift_context_t *sctx, Idx str_idx,
1742 			 re_node_set *dest_nodes)
1743 {
1744   const re_dfa_t *const dfa = mctx->dfa;
1745   reg_errcode_t err = REG_NOERROR;
1746   const re_node_set *candidates;
1747   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1748 		: &mctx->state_log[str_idx]->nodes);
1749 
1750   if (dest_nodes->nelem == 0)
1751     sctx->sifted_states[str_idx] = NULL;
1752   else
1753     {
1754       if (candidates)
1755 	{
1756 	  /* At first, add the nodes which can epsilon transit to a node in
1757 	     DEST_NODE.  */
1758 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1759 	  if (BE (err != REG_NOERROR, 0))
1760 	    return err;
1761 
1762 	  /* Then, check the limitations in the current sift_context.  */
1763 	  if (sctx->limits.nelem)
1764 	    {
1765 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1766 					 mctx->bkref_ents, str_idx);
1767 	      if (BE (err != REG_NOERROR, 0))
1768 		return err;
1769 	    }
1770 	}
1771 
1772       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1773       if (BE (err != REG_NOERROR, 0))
1774 	return err;
1775     }
1776 
1777   if (candidates && mctx->state_log[str_idx]->has_backref)
1778     {
1779       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1780       if (BE (err != REG_NOERROR, 0))
1781 	return err;
1782     }
1783   return REG_NOERROR;
1784 }
1785 
1786 static reg_errcode_t
1787 __attribute_warn_unused_result__
1788 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1789 		       const re_node_set *candidates)
1790 {
1791   reg_errcode_t err = REG_NOERROR;
1792   Idx i;
1793 
1794   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1795   if (BE (err != REG_NOERROR, 0))
1796     return err;
1797 
1798   if (!state->inveclosure.alloc)
1799     {
1800       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1801       if (BE (err != REG_NOERROR, 0))
1802 	return REG_ESPACE;
1803       for (i = 0; i < dest_nodes->nelem; i++)
1804 	{
1805 	  err = re_node_set_merge (&state->inveclosure,
1806 				   dfa->inveclosures + dest_nodes->elems[i]);
1807 	  if (BE (err != REG_NOERROR, 0))
1808 	    return REG_ESPACE;
1809 	}
1810     }
1811   return re_node_set_add_intersect (dest_nodes, candidates,
1812 				    &state->inveclosure);
1813 }
1814 
1815 static reg_errcode_t
1816 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1817 		       const re_node_set *candidates)
1818 {
1819     Idx ecl_idx;
1820     reg_errcode_t err;
1821     re_node_set *inv_eclosure = dfa->inveclosures + node;
1822     re_node_set except_nodes;
1823     re_node_set_init_empty (&except_nodes);
1824     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1825       {
1826 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1827 	if (cur_node == node)
1828 	  continue;
1829 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1830 	  {
1831 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1832 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1833 			 ? dfa->edests[cur_node].elems[1] : -1);
1834 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1835 		 && re_node_set_contains (dest_nodes, edst1))
1836 		|| (edst2 > 0
1837 		    && !re_node_set_contains (inv_eclosure, edst2)
1838 		    && re_node_set_contains (dest_nodes, edst2)))
1839 	      {
1840 		err = re_node_set_add_intersect (&except_nodes, candidates,
1841 						 dfa->inveclosures + cur_node);
1842 		if (BE (err != REG_NOERROR, 0))
1843 		  {
1844 		    re_node_set_free (&except_nodes);
1845 		    return err;
1846 		  }
1847 	      }
1848 	  }
1849       }
1850     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1851       {
1852 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1853 	if (!re_node_set_contains (&except_nodes, cur_node))
1854 	  {
1855 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1856 	    re_node_set_remove_at (dest_nodes, idx);
1857 	  }
1858       }
1859     re_node_set_free (&except_nodes);
1860     return REG_NOERROR;
1861 }
1862 
1863 static bool
1864 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1865 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1866 {
1867   const re_dfa_t *const dfa = mctx->dfa;
1868   Idx lim_idx, src_pos, dst_pos;
1869 
1870   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1871   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1872   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1873     {
1874       Idx subexp_idx;
1875       struct re_backref_cache_entry *ent;
1876       ent = mctx->bkref_ents + limits->elems[lim_idx];
1877       subexp_idx = dfa->nodes[ent->node].opr.idx;
1878 
1879       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1880 					   subexp_idx, dst_node, dst_idx,
1881 					   dst_bkref_idx);
1882       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1883 					   subexp_idx, src_node, src_idx,
1884 					   src_bkref_idx);
1885 
1886       /* In case of:
1887 	 <src> <dst> ( <subexp> )
1888 	 ( <subexp> ) <src> <dst>
1889 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1890       if (src_pos == dst_pos)
1891 	continue; /* This is unrelated limitation.  */
1892       else
1893 	return true;
1894     }
1895   return false;
1896 }
1897 
1898 static int
1899 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1900 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1901 {
1902   const re_dfa_t *const dfa = mctx->dfa;
1903   const re_node_set *eclosures = dfa->eclosures + from_node;
1904   Idx node_idx;
1905 
1906   /* Else, we are on the boundary: examine the nodes on the epsilon
1907      closure.  */
1908   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1909     {
1910       Idx node = eclosures->elems[node_idx];
1911       switch (dfa->nodes[node].type)
1912 	{
1913 	case OP_BACK_REF:
1914 	  if (bkref_idx != -1)
1915 	    {
1916 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1917 	      do
1918 		{
1919 		  Idx dst;
1920 		  int cpos;
1921 
1922 		  if (ent->node != node)
1923 		    continue;
1924 
1925 		  if (subexp_idx < BITSET_WORD_BITS
1926 		      && !(ent->eps_reachable_subexps_map
1927 			   & ((bitset_word_t) 1 << subexp_idx)))
1928 		    continue;
1929 
1930 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1931 		     OP_CLOSE_SUBEXP cases below.  But, if the
1932 		     destination node is the same node as the source
1933 		     node, don't recurse because it would cause an
1934 		     infinite loop: a regex that exhibits this behavior
1935 		     is ()\1*\1*  */
1936 		  dst = dfa->edests[node].elems[0];
1937 		  if (dst == from_node)
1938 		    {
1939 		      if (boundaries & 1)
1940 			return -1;
1941 		      else /* if (boundaries & 2) */
1942 			return 0;
1943 		    }
1944 
1945 		  cpos =
1946 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1947 						 dst, bkref_idx);
1948 		  if (cpos == -1 /* && (boundaries & 1) */)
1949 		    return -1;
1950 		  if (cpos == 0 && (boundaries & 2))
1951 		    return 0;
1952 
1953 		  if (subexp_idx < BITSET_WORD_BITS)
1954 		    ent->eps_reachable_subexps_map
1955 		      &= ~((bitset_word_t) 1 << subexp_idx);
1956 		}
1957 	      while (ent++->more);
1958 	    }
1959 	  break;
1960 
1961 	case OP_OPEN_SUBEXP:
1962 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1963 	    return -1;
1964 	  break;
1965 
1966 	case OP_CLOSE_SUBEXP:
1967 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1968 	    return 0;
1969 	  break;
1970 
1971 	default:
1972 	    break;
1973 	}
1974     }
1975 
1976   return (boundaries & 2) ? 1 : 0;
1977 }
1978 
1979 static int
1980 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1981 			   Idx subexp_idx, Idx from_node, Idx str_idx,
1982 			   Idx bkref_idx)
1983 {
1984   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1985   int boundaries;
1986 
1987   /* If we are outside the range of the subexpression, return -1 or 1.  */
1988   if (str_idx < lim->subexp_from)
1989     return -1;
1990 
1991   if (lim->subexp_to < str_idx)
1992     return 1;
1993 
1994   /* If we are within the subexpression, return 0.  */
1995   boundaries = (str_idx == lim->subexp_from);
1996   boundaries |= (str_idx == lim->subexp_to) << 1;
1997   if (boundaries == 0)
1998     return 0;
1999 
2000   /* Else, examine epsilon closure.  */
2001   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2002 				      from_node, bkref_idx);
2003 }
2004 
2005 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2006    which are against limitations from DEST_NODES. */
2007 
2008 static reg_errcode_t
2009 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2010 		     const re_node_set *candidates, re_node_set *limits,
2011 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2012 {
2013   reg_errcode_t err;
2014   Idx node_idx, lim_idx;
2015 
2016   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2017     {
2018       Idx subexp_idx;
2019       struct re_backref_cache_entry *ent;
2020       ent = bkref_ents + limits->elems[lim_idx];
2021 
2022       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2023 	continue; /* This is unrelated limitation.  */
2024 
2025       subexp_idx = dfa->nodes[ent->node].opr.idx;
2026       if (ent->subexp_to == str_idx)
2027 	{
2028 	  Idx ops_node = -1;
2029 	  Idx cls_node = -1;
2030 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2031 	    {
2032 	      Idx node = dest_nodes->elems[node_idx];
2033 	      re_token_type_t type = dfa->nodes[node].type;
2034 	      if (type == OP_OPEN_SUBEXP
2035 		  && subexp_idx == dfa->nodes[node].opr.idx)
2036 		ops_node = node;
2037 	      else if (type == OP_CLOSE_SUBEXP
2038 		       && subexp_idx == dfa->nodes[node].opr.idx)
2039 		cls_node = node;
2040 	    }
2041 
2042 	  /* Check the limitation of the open subexpression.  */
2043 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2044 	  if (ops_node >= 0)
2045 	    {
2046 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2047 					   candidates);
2048 	      if (BE (err != REG_NOERROR, 0))
2049 		return err;
2050 	    }
2051 
2052 	  /* Check the limitation of the close subexpression.  */
2053 	  if (cls_node >= 0)
2054 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2055 	      {
2056 		Idx node = dest_nodes->elems[node_idx];
2057 		if (!re_node_set_contains (dfa->inveclosures + node,
2058 					   cls_node)
2059 		    && !re_node_set_contains (dfa->eclosures + node,
2060 					      cls_node))
2061 		  {
2062 		    /* It is against this limitation.
2063 		       Remove it form the current sifted state.  */
2064 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2065 						 candidates);
2066 		    if (BE (err != REG_NOERROR, 0))
2067 		      return err;
2068 		    --node_idx;
2069 		  }
2070 	      }
2071 	}
2072       else /* (ent->subexp_to != str_idx)  */
2073 	{
2074 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2075 	    {
2076 	      Idx node = dest_nodes->elems[node_idx];
2077 	      re_token_type_t type = dfa->nodes[node].type;
2078 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2079 		{
2080 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2081 		    continue;
2082 		  /* It is against this limitation.
2083 		     Remove it form the current sifted state.  */
2084 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2085 					       candidates);
2086 		  if (BE (err != REG_NOERROR, 0))
2087 		    return err;
2088 		}
2089 	    }
2090 	}
2091     }
2092   return REG_NOERROR;
2093 }
2094 
2095 static reg_errcode_t
2096 __attribute_warn_unused_result__
2097 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2098 		   Idx str_idx, const re_node_set *candidates)
2099 {
2100   const re_dfa_t *const dfa = mctx->dfa;
2101   reg_errcode_t err;
2102   Idx node_idx, node;
2103   re_sift_context_t local_sctx;
2104   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2105 
2106   if (first_idx == -1)
2107     return REG_NOERROR;
2108 
2109   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2110 
2111   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2112     {
2113       Idx enabled_idx;
2114       re_token_type_t type;
2115       struct re_backref_cache_entry *entry;
2116       node = candidates->elems[node_idx];
2117       type = dfa->nodes[node].type;
2118       /* Avoid infinite loop for the REs like "()\1+".  */
2119       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2120 	continue;
2121       if (type != OP_BACK_REF)
2122 	continue;
2123 
2124       entry = mctx->bkref_ents + first_idx;
2125       enabled_idx = first_idx;
2126       do
2127 	{
2128 	  Idx subexp_len;
2129 	  Idx to_idx;
2130 	  Idx dst_node;
2131 	  bool ok;
2132 	  re_dfastate_t *cur_state;
2133 
2134 	  if (entry->node != node)
2135 	    continue;
2136 	  subexp_len = entry->subexp_to - entry->subexp_from;
2137 	  to_idx = str_idx + subexp_len;
2138 	  dst_node = (subexp_len ? dfa->nexts[node]
2139 		      : dfa->edests[node].elems[0]);
2140 
2141 	  if (to_idx > sctx->last_str_idx
2142 	      || sctx->sifted_states[to_idx] == NULL
2143 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2144 	      || check_dst_limits (mctx, &sctx->limits, node,
2145 				   str_idx, dst_node, to_idx))
2146 	    continue;
2147 
2148 	  if (local_sctx.sifted_states == NULL)
2149 	    {
2150 	      local_sctx = *sctx;
2151 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2152 	      if (BE (err != REG_NOERROR, 0))
2153 		goto free_return;
2154 	    }
2155 	  local_sctx.last_node = node;
2156 	  local_sctx.last_str_idx = str_idx;
2157 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2158 	  if (BE (! ok, 0))
2159 	    {
2160 	      err = REG_ESPACE;
2161 	      goto free_return;
2162 	    }
2163 	  cur_state = local_sctx.sifted_states[str_idx];
2164 	  err = sift_states_backward (mctx, &local_sctx);
2165 	  if (BE (err != REG_NOERROR, 0))
2166 	    goto free_return;
2167 	  if (sctx->limited_states != NULL)
2168 	    {
2169 	      err = merge_state_array (dfa, sctx->limited_states,
2170 				       local_sctx.sifted_states,
2171 				       str_idx + 1);
2172 	      if (BE (err != REG_NOERROR, 0))
2173 		goto free_return;
2174 	    }
2175 	  local_sctx.sifted_states[str_idx] = cur_state;
2176 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2177 
2178 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2179 	  entry = mctx->bkref_ents + enabled_idx;
2180 	}
2181       while (enabled_idx++, entry++->more);
2182     }
2183   err = REG_NOERROR;
2184  free_return:
2185   if (local_sctx.sifted_states != NULL)
2186     {
2187       re_node_set_free (&local_sctx.limits);
2188     }
2189 
2190   return err;
2191 }
2192 
2193 
2194 #ifdef RE_ENABLE_I18N
2195 static int
2196 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2197 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2198 {
2199   const re_dfa_t *const dfa = mctx->dfa;
2200   int naccepted;
2201   /* Check the node can accept "multi byte".  */
2202   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2203   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2204       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2205 			    dfa->nexts[node_idx]))
2206     /* The node can't accept the "multi byte", or the
2207        destination was already thrown away, then the node
2208        could't accept the current input "multi byte".   */
2209     naccepted = 0;
2210   /* Otherwise, it is sure that the node could accept
2211      'naccepted' bytes input.  */
2212   return naccepted;
2213 }
2214 #endif /* RE_ENABLE_I18N */
2215 
2216 
2217 /* Functions for state transition.  */
2218 
2219 /* Return the next state to which the current state STATE will transit by
2220    accepting the current input byte, and update STATE_LOG if necessary.
2221    If STATE can accept a multibyte char/collating element/back reference
2222    update the destination of STATE_LOG.  */
2223 
2224 static re_dfastate_t *
2225 __attribute_warn_unused_result__
2226 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2227 	       re_dfastate_t *state)
2228 {
2229   re_dfastate_t **trtable;
2230   unsigned char ch;
2231 
2232 #ifdef RE_ENABLE_I18N
2233   /* If the current state can accept multibyte.  */
2234   if (BE (state->accept_mb, 0))
2235     {
2236       *err = transit_state_mb (mctx, state);
2237       if (BE (*err != REG_NOERROR, 0))
2238 	return NULL;
2239     }
2240 #endif /* RE_ENABLE_I18N */
2241 
2242   /* Then decide the next state with the single byte.  */
2243 #if 0
2244   if (0)
2245     /* don't use transition table  */
2246     return transit_state_sb (err, mctx, state);
2247 #endif
2248 
2249   /* Use transition table  */
2250   ch = re_string_fetch_byte (&mctx->input);
2251   for (;;)
2252     {
2253       trtable = state->trtable;
2254       if (BE (trtable != NULL, 1))
2255 	return trtable[ch];
2256 
2257       trtable = state->word_trtable;
2258       if (BE (trtable != NULL, 1))
2259 	{
2260 	  unsigned int context;
2261 	  context
2262 	    = re_string_context_at (&mctx->input,
2263 				    re_string_cur_idx (&mctx->input) - 1,
2264 				    mctx->eflags);
2265 	  if (IS_WORD_CONTEXT (context))
2266 	    return trtable[ch + SBC_MAX];
2267 	  else
2268 	    return trtable[ch];
2269 	}
2270 
2271       if (!build_trtable (mctx->dfa, state))
2272 	{
2273 	  *err = REG_ESPACE;
2274 	  return NULL;
2275 	}
2276 
2277       /* Retry, we now have a transition table.  */
2278     }
2279 }
2280 
2281 /* Update the state_log if we need */
2282 static re_dfastate_t *
2283 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2284 		      re_dfastate_t *next_state)
2285 {
2286   const re_dfa_t *const dfa = mctx->dfa;
2287   Idx cur_idx = re_string_cur_idx (&mctx->input);
2288 
2289   if (cur_idx > mctx->state_log_top)
2290     {
2291       mctx->state_log[cur_idx] = next_state;
2292       mctx->state_log_top = cur_idx;
2293     }
2294   else if (mctx->state_log[cur_idx] == 0)
2295     {
2296       mctx->state_log[cur_idx] = next_state;
2297     }
2298   else
2299     {
2300       re_dfastate_t *pstate;
2301       unsigned int context;
2302       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2303       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2304 	 the destination of a multibyte char/collating element/
2305 	 back reference.  Then the next state is the union set of
2306 	 these destinations and the results of the transition table.  */
2307       pstate = mctx->state_log[cur_idx];
2308       log_nodes = pstate->entrance_nodes;
2309       if (next_state != NULL)
2310 	{
2311 	  table_nodes = next_state->entrance_nodes;
2312 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2313 					     log_nodes);
2314 	  if (BE (*err != REG_NOERROR, 0))
2315 	    return NULL;
2316 	}
2317       else
2318 	next_nodes = *log_nodes;
2319       /* Note: We already add the nodes of the initial state,
2320 	 then we don't need to add them here.  */
2321 
2322       context = re_string_context_at (&mctx->input,
2323 				      re_string_cur_idx (&mctx->input) - 1,
2324 				      mctx->eflags);
2325       next_state = mctx->state_log[cur_idx]
2326 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2327       /* We don't need to check errors here, since the return value of
2328 	 this function is next_state and ERR is already set.  */
2329 
2330       if (table_nodes != NULL)
2331 	re_node_set_free (&next_nodes);
2332     }
2333 
2334   if (BE (dfa->nbackref, 0) && next_state != NULL)
2335     {
2336       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2337 	 later.  We must check them here, since the back references in the
2338 	 next state might use them.  */
2339       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2340 					cur_idx);
2341       if (BE (*err != REG_NOERROR, 0))
2342 	return NULL;
2343 
2344       /* If the next state has back references.  */
2345       if (next_state->has_backref)
2346 	{
2347 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2348 	  if (BE (*err != REG_NOERROR, 0))
2349 	    return NULL;
2350 	  next_state = mctx->state_log[cur_idx];
2351 	}
2352     }
2353 
2354   return next_state;
2355 }
2356 
2357 /* Skip bytes in the input that correspond to part of a
2358    multi-byte match, then look in the log for a state
2359    from which to restart matching.  */
2360 static re_dfastate_t *
2361 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2362 {
2363   re_dfastate_t *cur_state;
2364   do
2365     {
2366       Idx max = mctx->state_log_top;
2367       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2368 
2369       do
2370 	{
2371 	  if (++cur_str_idx > max)
2372 	    return NULL;
2373 	  re_string_skip_bytes (&mctx->input, 1);
2374 	}
2375       while (mctx->state_log[cur_str_idx] == NULL);
2376 
2377       cur_state = merge_state_with_log (err, mctx, NULL);
2378     }
2379   while (*err == REG_NOERROR && cur_state == NULL);
2380   return cur_state;
2381 }
2382 
2383 /* Helper functions for transit_state.  */
2384 
2385 /* From the node set CUR_NODES, pick up the nodes whose types are
2386    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2387    expression. And register them to use them later for evaluating the
2388    corresponding back references.  */
2389 
2390 static reg_errcode_t
2391 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2392 			   Idx str_idx)
2393 {
2394   const re_dfa_t *const dfa = mctx->dfa;
2395   Idx node_idx;
2396   reg_errcode_t err;
2397 
2398   /* TODO: This isn't efficient.
2399 	   Because there might be more than one nodes whose types are
2400 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2401 	   nodes.
2402 	   E.g. RE: (a){2}  */
2403   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2404     {
2405       Idx node = cur_nodes->elems[node_idx];
2406       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2407 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2408 	  && (dfa->used_bkref_map
2409 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2410 	{
2411 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2412 	  if (BE (err != REG_NOERROR, 0))
2413 	    return err;
2414 	}
2415     }
2416   return REG_NOERROR;
2417 }
2418 
2419 #if 0
2420 /* Return the next state to which the current state STATE will transit by
2421    accepting the current input byte.  */
2422 
2423 static re_dfastate_t *
2424 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2425 		  re_dfastate_t *state)
2426 {
2427   const re_dfa_t *const dfa = mctx->dfa;
2428   re_node_set next_nodes;
2429   re_dfastate_t *next_state;
2430   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2431   unsigned int context;
2432 
2433   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2434   if (BE (*err != REG_NOERROR, 0))
2435     return NULL;
2436   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2437     {
2438       Idx cur_node = state->nodes.elems[node_cnt];
2439       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2440 	{
2441 	  *err = re_node_set_merge (&next_nodes,
2442 				    dfa->eclosures + dfa->nexts[cur_node]);
2443 	  if (BE (*err != REG_NOERROR, 0))
2444 	    {
2445 	      re_node_set_free (&next_nodes);
2446 	      return NULL;
2447 	    }
2448 	}
2449     }
2450   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2451   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2452   /* We don't need to check errors here, since the return value of
2453      this function is next_state and ERR is already set.  */
2454 
2455   re_node_set_free (&next_nodes);
2456   re_string_skip_bytes (&mctx->input, 1);
2457   return next_state;
2458 }
2459 #endif
2460 
2461 #ifdef RE_ENABLE_I18N
2462 static reg_errcode_t
2463 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2464 {
2465   const re_dfa_t *const dfa = mctx->dfa;
2466   reg_errcode_t err;
2467   Idx i;
2468 
2469   for (i = 0; i < pstate->nodes.nelem; ++i)
2470     {
2471       re_node_set dest_nodes, *new_nodes;
2472       Idx cur_node_idx = pstate->nodes.elems[i];
2473       int naccepted;
2474       Idx dest_idx;
2475       unsigned int context;
2476       re_dfastate_t *dest_state;
2477 
2478       if (!dfa->nodes[cur_node_idx].accept_mb)
2479 	continue;
2480 
2481       if (dfa->nodes[cur_node_idx].constraint)
2482 	{
2483 	  context = re_string_context_at (&mctx->input,
2484 					  re_string_cur_idx (&mctx->input),
2485 					  mctx->eflags);
2486 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2487 					   context))
2488 	    continue;
2489 	}
2490 
2491       /* How many bytes the node can accept?  */
2492       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2493 					   re_string_cur_idx (&mctx->input));
2494       if (naccepted == 0)
2495 	continue;
2496 
2497       /* The node can accepts 'naccepted' bytes.  */
2498       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2499       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2500 			       : mctx->max_mb_elem_len);
2501       err = clean_state_log_if_needed (mctx, dest_idx);
2502       if (BE (err != REG_NOERROR, 0))
2503 	return err;
2504 #ifdef DEBUG
2505       assert (dfa->nexts[cur_node_idx] != -1);
2506 #endif
2507       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2508 
2509       dest_state = mctx->state_log[dest_idx];
2510       if (dest_state == NULL)
2511 	dest_nodes = *new_nodes;
2512       else
2513 	{
2514 	  err = re_node_set_init_union (&dest_nodes,
2515 					dest_state->entrance_nodes, new_nodes);
2516 	  if (BE (err != REG_NOERROR, 0))
2517 	    return err;
2518 	}
2519       context = re_string_context_at (&mctx->input, dest_idx - 1,
2520 				      mctx->eflags);
2521       mctx->state_log[dest_idx]
2522 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2523       if (dest_state != NULL)
2524 	re_node_set_free (&dest_nodes);
2525       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2526 	return err;
2527     }
2528   return REG_NOERROR;
2529 }
2530 #endif /* RE_ENABLE_I18N */
2531 
2532 static reg_errcode_t
2533 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2534 {
2535   const re_dfa_t *const dfa = mctx->dfa;
2536   reg_errcode_t err;
2537   Idx i;
2538   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2539 
2540   for (i = 0; i < nodes->nelem; ++i)
2541     {
2542       Idx dest_str_idx, prev_nelem, bkc_idx;
2543       Idx node_idx = nodes->elems[i];
2544       unsigned int context;
2545       const re_token_t *node = dfa->nodes + node_idx;
2546       re_node_set *new_dest_nodes;
2547 
2548       /* Check whether 'node' is a backreference or not.  */
2549       if (node->type != OP_BACK_REF)
2550 	continue;
2551 
2552       if (node->constraint)
2553 	{
2554 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2555 					  mctx->eflags);
2556 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2557 	    continue;
2558 	}
2559 
2560       /* 'node' is a backreference.
2561 	 Check the substring which the substring matched.  */
2562       bkc_idx = mctx->nbkref_ents;
2563       err = get_subexp (mctx, node_idx, cur_str_idx);
2564       if (BE (err != REG_NOERROR, 0))
2565 	goto free_return;
2566 
2567       /* And add the epsilon closures (which is 'new_dest_nodes') of
2568 	 the backreference to appropriate state_log.  */
2569 #ifdef DEBUG
2570       assert (dfa->nexts[node_idx] != -1);
2571 #endif
2572       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2573 	{
2574 	  Idx subexp_len;
2575 	  re_dfastate_t *dest_state;
2576 	  struct re_backref_cache_entry *bkref_ent;
2577 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2578 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2579 	    continue;
2580 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2581 	  new_dest_nodes = (subexp_len == 0
2582 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2583 			    : dfa->eclosures + dfa->nexts[node_idx]);
2584 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2585 			  - bkref_ent->subexp_from);
2586 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2587 					  mctx->eflags);
2588 	  dest_state = mctx->state_log[dest_str_idx];
2589 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2590 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2591 	  /* Add 'new_dest_node' to state_log.  */
2592 	  if (dest_state == NULL)
2593 	    {
2594 	      mctx->state_log[dest_str_idx]
2595 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2596 					    context);
2597 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2598 		      && err != REG_NOERROR, 0))
2599 		goto free_return;
2600 	    }
2601 	  else
2602 	    {
2603 	      re_node_set dest_nodes;
2604 	      err = re_node_set_init_union (&dest_nodes,
2605 					    dest_state->entrance_nodes,
2606 					    new_dest_nodes);
2607 	      if (BE (err != REG_NOERROR, 0))
2608 		{
2609 		  re_node_set_free (&dest_nodes);
2610 		  goto free_return;
2611 		}
2612 	      mctx->state_log[dest_str_idx]
2613 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2614 	      re_node_set_free (&dest_nodes);
2615 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2616 		      && err != REG_NOERROR, 0))
2617 		goto free_return;
2618 	    }
2619 	  /* We need to check recursively if the backreference can epsilon
2620 	     transit.  */
2621 	  if (subexp_len == 0
2622 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2623 	    {
2624 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2625 					       cur_str_idx);
2626 	      if (BE (err != REG_NOERROR, 0))
2627 		goto free_return;
2628 	      err = transit_state_bkref (mctx, new_dest_nodes);
2629 	      if (BE (err != REG_NOERROR, 0))
2630 		goto free_return;
2631 	    }
2632 	}
2633     }
2634   err = REG_NOERROR;
2635  free_return:
2636   return err;
2637 }
2638 
2639 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2640    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2641    Note that we might collect inappropriate candidates here.
2642    However, the cost of checking them strictly here is too high, then we
2643    delay these checking for prune_impossible_nodes().  */
2644 
2645 static reg_errcode_t
2646 __attribute_warn_unused_result__
2647 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2648 {
2649   const re_dfa_t *const dfa = mctx->dfa;
2650   Idx subexp_num, sub_top_idx;
2651   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2652   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2653   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2654   if (cache_idx != -1)
2655     {
2656       const struct re_backref_cache_entry *entry
2657 	= mctx->bkref_ents + cache_idx;
2658       do
2659 	if (entry->node == bkref_node)
2660 	  return REG_NOERROR; /* We already checked it.  */
2661       while (entry++->more);
2662     }
2663 
2664   subexp_num = dfa->nodes[bkref_node].opr.idx;
2665 
2666   /* For each sub expression  */
2667   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2668     {
2669       reg_errcode_t err;
2670       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2671       re_sub_match_last_t *sub_last;
2672       Idx sub_last_idx, sl_str, bkref_str_off;
2673 
2674       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2675 	continue; /* It isn't related.  */
2676 
2677       sl_str = sub_top->str_idx;
2678       bkref_str_off = bkref_str_idx;
2679       /* At first, check the last node of sub expressions we already
2680 	 evaluated.  */
2681       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2682 	{
2683 	  regoff_t sl_str_diff;
2684 	  sub_last = sub_top->lasts[sub_last_idx];
2685 	  sl_str_diff = sub_last->str_idx - sl_str;
2686 	  /* The matched string by the sub expression match with the substring
2687 	     at the back reference?  */
2688 	  if (sl_str_diff > 0)
2689 	    {
2690 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2691 		{
2692 		  /* Not enough chars for a successful match.  */
2693 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2694 		    break;
2695 
2696 		  err = clean_state_log_if_needed (mctx,
2697 						   bkref_str_off
2698 						   + sl_str_diff);
2699 		  if (BE (err != REG_NOERROR, 0))
2700 		    return err;
2701 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2702 		}
2703 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2704 		/* We don't need to search this sub expression any more.  */
2705 		break;
2706 	    }
2707 	  bkref_str_off += sl_str_diff;
2708 	  sl_str += sl_str_diff;
2709 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2710 				bkref_str_idx);
2711 
2712 	  /* Reload buf, since the preceding call might have reallocated
2713 	     the buffer.  */
2714 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2715 
2716 	  if (err == REG_NOMATCH)
2717 	    continue;
2718 	  if (BE (err != REG_NOERROR, 0))
2719 	    return err;
2720 	}
2721 
2722       if (sub_last_idx < sub_top->nlasts)
2723 	continue;
2724       if (sub_last_idx > 0)
2725 	++sl_str;
2726       /* Then, search for the other last nodes of the sub expression.  */
2727       for (; sl_str <= bkref_str_idx; ++sl_str)
2728 	{
2729 	  Idx cls_node;
2730 	  regoff_t sl_str_off;
2731 	  const re_node_set *nodes;
2732 	  sl_str_off = sl_str - sub_top->str_idx;
2733 	  /* The matched string by the sub expression match with the substring
2734 	     at the back reference?  */
2735 	  if (sl_str_off > 0)
2736 	    {
2737 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2738 		{
2739 		  /* If we are at the end of the input, we cannot match.  */
2740 		  if (bkref_str_off >= mctx->input.len)
2741 		    break;
2742 
2743 		  err = extend_buffers (mctx, bkref_str_off + 1);
2744 		  if (BE (err != REG_NOERROR, 0))
2745 		    return err;
2746 
2747 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2748 		}
2749 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2750 		break; /* We don't need to search this sub expression
2751 			  any more.  */
2752 	    }
2753 	  if (mctx->state_log[sl_str] == NULL)
2754 	    continue;
2755 	  /* Does this state have a ')' of the sub expression?  */
2756 	  nodes = &mctx->state_log[sl_str]->nodes;
2757 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2758 				       OP_CLOSE_SUBEXP);
2759 	  if (cls_node == -1)
2760 	    continue; /* No.  */
2761 	  if (sub_top->path == NULL)
2762 	    {
2763 	      sub_top->path = calloc (sizeof (state_array_t),
2764 				      sl_str - sub_top->str_idx + 1);
2765 	      if (sub_top->path == NULL)
2766 		return REG_ESPACE;
2767 	    }
2768 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2769 	     in the current context?  */
2770 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2771 			       sub_top->str_idx, cls_node, sl_str,
2772 			       OP_CLOSE_SUBEXP);
2773 	  if (err == REG_NOMATCH)
2774 	      continue;
2775 	  if (BE (err != REG_NOERROR, 0))
2776 	      return err;
2777 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2778 	  if (BE (sub_last == NULL, 0))
2779 	    return REG_ESPACE;
2780 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2781 				bkref_str_idx);
2782 	  if (err == REG_NOMATCH)
2783 	    continue;
2784 	}
2785     }
2786   return REG_NOERROR;
2787 }
2788 
2789 /* Helper functions for get_subexp().  */
2790 
2791 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2792    If it can arrive, register the sub expression expressed with SUB_TOP
2793    and SUB_LAST.  */
2794 
2795 static reg_errcode_t
2796 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2797 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2798 {
2799   reg_errcode_t err;
2800   Idx to_idx;
2801   /* Can the subexpression arrive the back reference?  */
2802   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2803 		       sub_last->str_idx, bkref_node, bkref_str,
2804 		       OP_OPEN_SUBEXP);
2805   if (err != REG_NOERROR)
2806     return err;
2807   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2808 			     sub_last->str_idx);
2809   if (BE (err != REG_NOERROR, 0))
2810     return err;
2811   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2812   return clean_state_log_if_needed (mctx, to_idx);
2813 }
2814 
2815 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2816    Search '(' if FL_OPEN, or search ')' otherwise.
2817    TODO: This function isn't efficient...
2818 	 Because there might be more than one nodes whose types are
2819 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2820 	 nodes.
2821 	 E.g. RE: (a){2}  */
2822 
2823 static Idx
2824 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2825 		  Idx subexp_idx, int type)
2826 {
2827   Idx cls_idx;
2828   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2829     {
2830       Idx cls_node = nodes->elems[cls_idx];
2831       const re_token_t *node = dfa->nodes + cls_node;
2832       if (node->type == type
2833 	  && node->opr.idx == subexp_idx)
2834 	return cls_node;
2835     }
2836   return -1;
2837 }
2838 
2839 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2840    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2841    heavily reused.
2842    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2843 
2844 static reg_errcode_t
2845 __attribute_warn_unused_result__
2846 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2847 	       Idx top_str, Idx last_node, Idx last_str, int type)
2848 {
2849   const re_dfa_t *const dfa = mctx->dfa;
2850   reg_errcode_t err = REG_NOERROR;
2851   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2852   re_dfastate_t *cur_state = NULL;
2853   re_node_set *cur_nodes, next_nodes;
2854   re_dfastate_t **backup_state_log;
2855   unsigned int context;
2856 
2857   subexp_num = dfa->nodes[top_node].opr.idx;
2858   /* Extend the buffer if we need.  */
2859   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2860     {
2861       re_dfastate_t **new_array;
2862       Idx old_alloc = path->alloc;
2863       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2864       Idx new_alloc;
2865       if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2866 	return REG_ESPACE;
2867       new_alloc = old_alloc + incr_alloc;
2868       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2869 	return REG_ESPACE;
2870       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2871       if (BE (new_array == NULL, 0))
2872 	return REG_ESPACE;
2873       path->array = new_array;
2874       path->alloc = new_alloc;
2875       memset (new_array + old_alloc, '\0',
2876 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2877     }
2878 
2879   str_idx = path->next_idx ? path->next_idx : top_str;
2880 
2881   /* Temporary modify MCTX.  */
2882   backup_state_log = mctx->state_log;
2883   backup_cur_idx = mctx->input.cur_idx;
2884   mctx->state_log = path->array;
2885   mctx->input.cur_idx = str_idx;
2886 
2887   /* Setup initial node set.  */
2888   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2889   if (str_idx == top_str)
2890     {
2891       err = re_node_set_init_1 (&next_nodes, top_node);
2892       if (BE (err != REG_NOERROR, 0))
2893 	return err;
2894       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2895       if (BE (err != REG_NOERROR, 0))
2896 	{
2897 	  re_node_set_free (&next_nodes);
2898 	  return err;
2899 	}
2900     }
2901   else
2902     {
2903       cur_state = mctx->state_log[str_idx];
2904       if (cur_state && cur_state->has_backref)
2905 	{
2906 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2907 	  if (BE (err != REG_NOERROR, 0))
2908 	    return err;
2909 	}
2910       else
2911 	re_node_set_init_empty (&next_nodes);
2912     }
2913   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2914     {
2915       if (next_nodes.nelem)
2916 	{
2917 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2918 				    subexp_num, type);
2919 	  if (BE (err != REG_NOERROR, 0))
2920 	    {
2921 	      re_node_set_free (&next_nodes);
2922 	      return err;
2923 	    }
2924 	}
2925       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2926       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2927 	{
2928 	  re_node_set_free (&next_nodes);
2929 	  return err;
2930 	}
2931       mctx->state_log[str_idx] = cur_state;
2932     }
2933 
2934   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2935     {
2936       re_node_set_empty (&next_nodes);
2937       if (mctx->state_log[str_idx + 1])
2938 	{
2939 	  err = re_node_set_merge (&next_nodes,
2940 				   &mctx->state_log[str_idx + 1]->nodes);
2941 	  if (BE (err != REG_NOERROR, 0))
2942 	    {
2943 	      re_node_set_free (&next_nodes);
2944 	      return err;
2945 	    }
2946 	}
2947       if (cur_state)
2948 	{
2949 	  err = check_arrival_add_next_nodes (mctx, str_idx,
2950 					      &cur_state->non_eps_nodes,
2951 					      &next_nodes);
2952 	  if (BE (err != REG_NOERROR, 0))
2953 	    {
2954 	      re_node_set_free (&next_nodes);
2955 	      return err;
2956 	    }
2957 	}
2958       ++str_idx;
2959       if (next_nodes.nelem)
2960 	{
2961 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2962 	  if (BE (err != REG_NOERROR, 0))
2963 	    {
2964 	      re_node_set_free (&next_nodes);
2965 	      return err;
2966 	    }
2967 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2968 				    subexp_num, type);
2969 	  if (BE (err != REG_NOERROR, 0))
2970 	    {
2971 	      re_node_set_free (&next_nodes);
2972 	      return err;
2973 	    }
2974 	}
2975       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2976       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2977       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2978 	{
2979 	  re_node_set_free (&next_nodes);
2980 	  return err;
2981 	}
2982       mctx->state_log[str_idx] = cur_state;
2983       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2984     }
2985   re_node_set_free (&next_nodes);
2986   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2987 	       : &mctx->state_log[last_str]->nodes);
2988   path->next_idx = str_idx;
2989 
2990   /* Fix MCTX.  */
2991   mctx->state_log = backup_state_log;
2992   mctx->input.cur_idx = backup_cur_idx;
2993 
2994   /* Then check the current node set has the node LAST_NODE.  */
2995   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2996     return REG_NOERROR;
2997 
2998   return REG_NOMATCH;
2999 }
3000 
3001 /* Helper functions for check_arrival.  */
3002 
3003 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3004    to NEXT_NODES.
3005    TODO: This function is similar to the functions transit_state*(),
3006 	 however this function has many additional works.
3007 	 Can't we unify them?  */
3008 
3009 static reg_errcode_t
3010 __attribute_warn_unused_result__
3011 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3012 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3013 {
3014   const re_dfa_t *const dfa = mctx->dfa;
3015   bool ok;
3016   Idx cur_idx;
3017 #ifdef RE_ENABLE_I18N
3018   reg_errcode_t err = REG_NOERROR;
3019 #endif
3020   re_node_set union_set;
3021   re_node_set_init_empty (&union_set);
3022   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3023     {
3024       int naccepted = 0;
3025       Idx cur_node = cur_nodes->elems[cur_idx];
3026 #ifdef DEBUG
3027       re_token_type_t type = dfa->nodes[cur_node].type;
3028       assert (!IS_EPSILON_NODE (type));
3029 #endif
3030 #ifdef RE_ENABLE_I18N
3031       /* If the node may accept "multi byte".  */
3032       if (dfa->nodes[cur_node].accept_mb)
3033 	{
3034 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3035 					       str_idx);
3036 	  if (naccepted > 1)
3037 	    {
3038 	      re_dfastate_t *dest_state;
3039 	      Idx next_node = dfa->nexts[cur_node];
3040 	      Idx next_idx = str_idx + naccepted;
3041 	      dest_state = mctx->state_log[next_idx];
3042 	      re_node_set_empty (&union_set);
3043 	      if (dest_state)
3044 		{
3045 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3046 		  if (BE (err != REG_NOERROR, 0))
3047 		    {
3048 		      re_node_set_free (&union_set);
3049 		      return err;
3050 		    }
3051 		}
3052 	      ok = re_node_set_insert (&union_set, next_node);
3053 	      if (BE (! ok, 0))
3054 		{
3055 		  re_node_set_free (&union_set);
3056 		  return REG_ESPACE;
3057 		}
3058 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3059 							    &union_set);
3060 	      if (BE (mctx->state_log[next_idx] == NULL
3061 		      && err != REG_NOERROR, 0))
3062 		{
3063 		  re_node_set_free (&union_set);
3064 		  return err;
3065 		}
3066 	    }
3067 	}
3068 #endif /* RE_ENABLE_I18N */
3069       if (naccepted
3070 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3071 	{
3072 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3073 	  if (BE (! ok, 0))
3074 	    {
3075 	      re_node_set_free (&union_set);
3076 	      return REG_ESPACE;
3077 	    }
3078 	}
3079     }
3080   re_node_set_free (&union_set);
3081   return REG_NOERROR;
3082 }
3083 
3084 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3085    CUR_NODES, however exclude the nodes which are:
3086     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3087     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3088 */
3089 
3090 static reg_errcode_t
3091 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3092 			  Idx ex_subexp, int type)
3093 {
3094   reg_errcode_t err;
3095   Idx idx, outside_node;
3096   re_node_set new_nodes;
3097 #ifdef DEBUG
3098   assert (cur_nodes->nelem);
3099 #endif
3100   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3101   if (BE (err != REG_NOERROR, 0))
3102     return err;
3103   /* Create a new node set NEW_NODES with the nodes which are epsilon
3104      closures of the node in CUR_NODES.  */
3105 
3106   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3107     {
3108       Idx cur_node = cur_nodes->elems[idx];
3109       const re_node_set *eclosure = dfa->eclosures + cur_node;
3110       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3111       if (outside_node == -1)
3112 	{
3113 	  /* There are no problematic nodes, just merge them.  */
3114 	  err = re_node_set_merge (&new_nodes, eclosure);
3115 	  if (BE (err != REG_NOERROR, 0))
3116 	    {
3117 	      re_node_set_free (&new_nodes);
3118 	      return err;
3119 	    }
3120 	}
3121       else
3122 	{
3123 	  /* There are problematic nodes, re-calculate incrementally.  */
3124 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3125 					      ex_subexp, type);
3126 	  if (BE (err != REG_NOERROR, 0))
3127 	    {
3128 	      re_node_set_free (&new_nodes);
3129 	      return err;
3130 	    }
3131 	}
3132     }
3133   re_node_set_free (cur_nodes);
3134   *cur_nodes = new_nodes;
3135   return REG_NOERROR;
3136 }
3137 
3138 /* Helper function for check_arrival_expand_ecl.
3139    Check incrementally the epsilon closure of TARGET, and if it isn't
3140    problematic append it to DST_NODES.  */
3141 
3142 static reg_errcode_t
3143 __attribute_warn_unused_result__
3144 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3145 			      Idx target, Idx ex_subexp, int type)
3146 {
3147   Idx cur_node;
3148   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3149     {
3150       bool ok;
3151 
3152       if (dfa->nodes[cur_node].type == type
3153 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3154 	{
3155 	  if (type == OP_CLOSE_SUBEXP)
3156 	    {
3157 	      ok = re_node_set_insert (dst_nodes, cur_node);
3158 	      if (BE (! ok, 0))
3159 		return REG_ESPACE;
3160 	    }
3161 	  break;
3162 	}
3163       ok = re_node_set_insert (dst_nodes, cur_node);
3164       if (BE (! ok, 0))
3165 	return REG_ESPACE;
3166       if (dfa->edests[cur_node].nelem == 0)
3167 	break;
3168       if (dfa->edests[cur_node].nelem == 2)
3169 	{
3170 	  reg_errcode_t err;
3171 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3172 					      dfa->edests[cur_node].elems[1],
3173 					      ex_subexp, type);
3174 	  if (BE (err != REG_NOERROR, 0))
3175 	    return err;
3176 	}
3177       cur_node = dfa->edests[cur_node].elems[0];
3178     }
3179   return REG_NOERROR;
3180 }
3181 
3182 
3183 /* For all the back references in the current state, calculate the
3184    destination of the back references by the appropriate entry
3185    in MCTX->BKREF_ENTS.  */
3186 
3187 static reg_errcode_t
3188 __attribute_warn_unused_result__
3189 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3190 		    Idx cur_str, Idx subexp_num, int type)
3191 {
3192   const re_dfa_t *const dfa = mctx->dfa;
3193   reg_errcode_t err;
3194   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3195   struct re_backref_cache_entry *ent;
3196 
3197   if (cache_idx_start == -1)
3198     return REG_NOERROR;
3199 
3200  restart:
3201   ent = mctx->bkref_ents + cache_idx_start;
3202   do
3203     {
3204       Idx to_idx, next_node;
3205 
3206       /* Is this entry ENT is appropriate?  */
3207       if (!re_node_set_contains (cur_nodes, ent->node))
3208 	continue; /* No.  */
3209 
3210       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3211       /* Calculate the destination of the back reference, and append it
3212 	 to MCTX->STATE_LOG.  */
3213       if (to_idx == cur_str)
3214 	{
3215 	  /* The backreference did epsilon transit, we must re-check all the
3216 	     node in the current state.  */
3217 	  re_node_set new_dests;
3218 	  reg_errcode_t err2, err3;
3219 	  next_node = dfa->edests[ent->node].elems[0];
3220 	  if (re_node_set_contains (cur_nodes, next_node))
3221 	    continue;
3222 	  err = re_node_set_init_1 (&new_dests, next_node);
3223 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3224 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3225 	  re_node_set_free (&new_dests);
3226 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3227 		  || err3 != REG_NOERROR, 0))
3228 	    {
3229 	      err = (err != REG_NOERROR ? err
3230 		     : (err2 != REG_NOERROR ? err2 : err3));
3231 	      return err;
3232 	    }
3233 	  /* TODO: It is still inefficient...  */
3234 	  goto restart;
3235 	}
3236       else
3237 	{
3238 	  re_node_set union_set;
3239 	  next_node = dfa->nexts[ent->node];
3240 	  if (mctx->state_log[to_idx])
3241 	    {
3242 	      bool ok;
3243 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3244 					next_node))
3245 		continue;
3246 	      err = re_node_set_init_copy (&union_set,
3247 					   &mctx->state_log[to_idx]->nodes);
3248 	      ok = re_node_set_insert (&union_set, next_node);
3249 	      if (BE (err != REG_NOERROR || ! ok, 0))
3250 		{
3251 		  re_node_set_free (&union_set);
3252 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3253 		  return err;
3254 		}
3255 	    }
3256 	  else
3257 	    {
3258 	      err = re_node_set_init_1 (&union_set, next_node);
3259 	      if (BE (err != REG_NOERROR, 0))
3260 		return err;
3261 	    }
3262 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3263 	  re_node_set_free (&union_set);
3264 	  if (BE (mctx->state_log[to_idx] == NULL
3265 		  && err != REG_NOERROR, 0))
3266 	    return err;
3267 	}
3268     }
3269   while (ent++->more);
3270   return REG_NOERROR;
3271 }
3272 
3273 /* Build transition table for the state.
3274    Return true if successful.  */
3275 
3276 static bool
3277 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3278 {
3279   reg_errcode_t err;
3280   Idx i, j;
3281   int ch;
3282   bool need_word_trtable = false;
3283   bitset_word_t elem, mask;
3284   bool dests_node_malloced = false;
3285   bool dest_states_malloced = false;
3286   Idx ndests; /* Number of the destination states from 'state'.  */
3287   re_dfastate_t **trtable;
3288   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3289   re_node_set follows, *dests_node;
3290   bitset_t *dests_ch;
3291   bitset_t acceptable;
3292 
3293   struct dests_alloc
3294   {
3295     re_node_set dests_node[SBC_MAX];
3296     bitset_t dests_ch[SBC_MAX];
3297   } *dests_alloc;
3298 
3299   /* We build DFA states which corresponds to the destination nodes
3300      from 'state'.  'dests_node[i]' represents the nodes which i-th
3301      destination state contains, and 'dests_ch[i]' represents the
3302      characters which i-th destination state accepts.  */
3303   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3304     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3305   else
3306     {
3307       dests_alloc = re_malloc (struct dests_alloc, 1);
3308       if (BE (dests_alloc == NULL, 0))
3309 	return false;
3310       dests_node_malloced = true;
3311     }
3312   dests_node = dests_alloc->dests_node;
3313   dests_ch = dests_alloc->dests_ch;
3314 
3315   /* Initialize transition table.  */
3316   state->word_trtable = state->trtable = NULL;
3317 
3318   /* At first, group all nodes belonging to 'state' into several
3319      destinations.  */
3320   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3321   if (BE (ndests <= 0, 0))
3322     {
3323       if (dests_node_malloced)
3324 	re_free (dests_alloc);
3325       /* Return false in case of an error, true otherwise.  */
3326       if (ndests == 0)
3327 	{
3328 	  state->trtable = (re_dfastate_t **)
3329 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3330           if (BE (state->trtable == NULL, 0))
3331             return false;
3332 	  return true;
3333 	}
3334       return false;
3335     }
3336 
3337   err = re_node_set_alloc (&follows, ndests + 1);
3338   if (BE (err != REG_NOERROR, 0))
3339     goto out_free;
3340 
3341   /* Avoid arithmetic overflow in size calculation.  */
3342   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3343 	    / (3 * sizeof (re_dfastate_t *)))
3344 	   < ndests),
3345 	  0))
3346     goto out_free;
3347 
3348   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3349 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3350     dest_states = (re_dfastate_t **)
3351       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3352   else
3353     {
3354       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3355       if (BE (dest_states == NULL, 0))
3356 	{
3357 out_free:
3358 	  if (dest_states_malloced)
3359 	    re_free (dest_states);
3360 	  re_node_set_free (&follows);
3361 	  for (i = 0; i < ndests; ++i)
3362 	    re_node_set_free (dests_node + i);
3363 	  if (dests_node_malloced)
3364 	    re_free (dests_alloc);
3365 	  return false;
3366 	}
3367       dest_states_malloced = true;
3368     }
3369   dest_states_word = dest_states + ndests;
3370   dest_states_nl = dest_states_word + ndests;
3371   bitset_empty (acceptable);
3372 
3373   /* Then build the states for all destinations.  */
3374   for (i = 0; i < ndests; ++i)
3375     {
3376       Idx next_node;
3377       re_node_set_empty (&follows);
3378       /* Merge the follows of this destination states.  */
3379       for (j = 0; j < dests_node[i].nelem; ++j)
3380 	{
3381 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3382 	  if (next_node != -1)
3383 	    {
3384 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3385 	      if (BE (err != REG_NOERROR, 0))
3386 		goto out_free;
3387 	    }
3388 	}
3389       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3390       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3391 	goto out_free;
3392       /* If the new state has context constraint,
3393 	 build appropriate states for these contexts.  */
3394       if (dest_states[i]->has_constraint)
3395 	{
3396 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3397 							  CONTEXT_WORD);
3398 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3399 	    goto out_free;
3400 
3401 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3402 	    need_word_trtable = true;
3403 
3404 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3405 							CONTEXT_NEWLINE);
3406 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3407 	    goto out_free;
3408 	}
3409       else
3410 	{
3411 	  dest_states_word[i] = dest_states[i];
3412 	  dest_states_nl[i] = dest_states[i];
3413 	}
3414       bitset_merge (acceptable, dests_ch[i]);
3415     }
3416 
3417   if (!BE (need_word_trtable, 0))
3418     {
3419       /* We don't care about whether the following character is a word
3420 	 character, or we are in a single-byte character set so we can
3421 	 discern by looking at the character code: allocate a
3422 	 256-entry transition table.  */
3423       trtable = state->trtable =
3424 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3425       if (BE (trtable == NULL, 0))
3426 	goto out_free;
3427 
3428       /* For all characters ch...:  */
3429       for (i = 0; i < BITSET_WORDS; ++i)
3430 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3431 	     elem;
3432 	     mask <<= 1, elem >>= 1, ++ch)
3433 	  if (BE (elem & 1, 0))
3434 	    {
3435 	      /* There must be exactly one destination which accepts
3436 		 character ch.  See group_nodes_into_DFAstates.  */
3437 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3438 		;
3439 
3440 	      /* j-th destination accepts the word character ch.  */
3441 	      if (dfa->word_char[i] & mask)
3442 		trtable[ch] = dest_states_word[j];
3443 	      else
3444 		trtable[ch] = dest_states[j];
3445 	    }
3446     }
3447   else
3448     {
3449       /* We care about whether the following character is a word
3450 	 character, and we are in a multi-byte character set: discern
3451 	 by looking at the character code: build two 256-entry
3452 	 transition tables, one starting at trtable[0] and one
3453 	 starting at trtable[SBC_MAX].  */
3454       trtable = state->word_trtable =
3455 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3456       if (BE (trtable == NULL, 0))
3457 	goto out_free;
3458 
3459       /* For all characters ch...:  */
3460       for (i = 0; i < BITSET_WORDS; ++i)
3461 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3462 	     elem;
3463 	     mask <<= 1, elem >>= 1, ++ch)
3464 	  if (BE (elem & 1, 0))
3465 	    {
3466 	      /* There must be exactly one destination which accepts
3467 		 character ch.  See group_nodes_into_DFAstates.  */
3468 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3469 		;
3470 
3471 	      /* j-th destination accepts the word character ch.  */
3472 	      trtable[ch] = dest_states[j];
3473 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3474 	    }
3475     }
3476 
3477   /* new line */
3478   if (bitset_contain (acceptable, NEWLINE_CHAR))
3479     {
3480       /* The current state accepts newline character.  */
3481       for (j = 0; j < ndests; ++j)
3482 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3483 	  {
3484 	    /* k-th destination accepts newline character.  */
3485 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3486 	    if (need_word_trtable)
3487 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3488 	    /* There must be only one destination which accepts
3489 	       newline.  See group_nodes_into_DFAstates.  */
3490 	    break;
3491 	  }
3492     }
3493 
3494   if (dest_states_malloced)
3495     re_free (dest_states);
3496 
3497   re_node_set_free (&follows);
3498   for (i = 0; i < ndests; ++i)
3499     re_node_set_free (dests_node + i);
3500 
3501   if (dests_node_malloced)
3502     re_free (dests_alloc);
3503 
3504   return true;
3505 }
3506 
3507 /* Group all nodes belonging to STATE into several destinations.
3508    Then for all destinations, set the nodes belonging to the destination
3509    to DESTS_NODE[i] and set the characters accepted by the destination
3510    to DEST_CH[i].  This function return the number of destinations.  */
3511 
3512 static Idx
3513 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3514 			    re_node_set *dests_node, bitset_t *dests_ch)
3515 {
3516   reg_errcode_t err;
3517   bool ok;
3518   Idx i, j, k;
3519   Idx ndests; /* Number of the destinations from 'state'.  */
3520   bitset_t accepts; /* Characters a node can accept.  */
3521   const re_node_set *cur_nodes = &state->nodes;
3522   bitset_empty (accepts);
3523   ndests = 0;
3524 
3525   /* For all the nodes belonging to 'state',  */
3526   for (i = 0; i < cur_nodes->nelem; ++i)
3527     {
3528       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3529       re_token_type_t type = node->type;
3530       unsigned int constraint = node->constraint;
3531 
3532       /* Enumerate all single byte character this node can accept.  */
3533       if (type == CHARACTER)
3534 	bitset_set (accepts, node->opr.c);
3535       else if (type == SIMPLE_BRACKET)
3536 	{
3537 	  bitset_merge (accepts, node->opr.sbcset);
3538 	}
3539       else if (type == OP_PERIOD)
3540 	{
3541 #ifdef RE_ENABLE_I18N
3542 	  if (dfa->mb_cur_max > 1)
3543 	    bitset_merge (accepts, dfa->sb_char);
3544 	  else
3545 #endif
3546 	    bitset_set_all (accepts);
3547 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3548 	    bitset_clear (accepts, '\n');
3549 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3550 	    bitset_clear (accepts, '\0');
3551 	}
3552 #ifdef RE_ENABLE_I18N
3553       else if (type == OP_UTF8_PERIOD)
3554 	{
3555 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3556 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3557 	  else
3558 	    bitset_merge (accepts, utf8_sb_map);
3559 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3560 	    bitset_clear (accepts, '\n');
3561 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3562 	    bitset_clear (accepts, '\0');
3563 	}
3564 #endif
3565       else
3566 	continue;
3567 
3568       /* Check the 'accepts' and sift the characters which are not
3569 	 match it the context.  */
3570       if (constraint)
3571 	{
3572 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3573 	    {
3574 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3575 	      bitset_empty (accepts);
3576 	      if (accepts_newline)
3577 		bitset_set (accepts, NEWLINE_CHAR);
3578 	      else
3579 		continue;
3580 	    }
3581 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3582 	    {
3583 	      bitset_empty (accepts);
3584 	      continue;
3585 	    }
3586 
3587 	  if (constraint & NEXT_WORD_CONSTRAINT)
3588 	    {
3589 	      bitset_word_t any_set = 0;
3590 	      if (type == CHARACTER && !node->word_char)
3591 		{
3592 		  bitset_empty (accepts);
3593 		  continue;
3594 		}
3595 #ifdef RE_ENABLE_I18N
3596 	      if (dfa->mb_cur_max > 1)
3597 		for (j = 0; j < BITSET_WORDS; ++j)
3598 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3599 	      else
3600 #endif
3601 		for (j = 0; j < BITSET_WORDS; ++j)
3602 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3603 	      if (!any_set)
3604 		continue;
3605 	    }
3606 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3607 	    {
3608 	      bitset_word_t any_set = 0;
3609 	      if (type == CHARACTER && node->word_char)
3610 		{
3611 		  bitset_empty (accepts);
3612 		  continue;
3613 		}
3614 #ifdef RE_ENABLE_I18N
3615 	      if (dfa->mb_cur_max > 1)
3616 		for (j = 0; j < BITSET_WORDS; ++j)
3617 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3618 	      else
3619 #endif
3620 		for (j = 0; j < BITSET_WORDS; ++j)
3621 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3622 	      if (!any_set)
3623 		continue;
3624 	    }
3625 	}
3626 
3627       /* Then divide 'accepts' into DFA states, or create a new
3628 	 state.  Above, we make sure that accepts is not empty.  */
3629       for (j = 0; j < ndests; ++j)
3630 	{
3631 	  bitset_t intersec; /* Intersection sets, see below.  */
3632 	  bitset_t remains;
3633 	  /* Flags, see below.  */
3634 	  bitset_word_t has_intersec, not_subset, not_consumed;
3635 
3636 	  /* Optimization, skip if this state doesn't accept the character.  */
3637 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3638 	    continue;
3639 
3640 	  /* Enumerate the intersection set of this state and 'accepts'.  */
3641 	  has_intersec = 0;
3642 	  for (k = 0; k < BITSET_WORDS; ++k)
3643 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3644 	  /* And skip if the intersection set is empty.  */
3645 	  if (!has_intersec)
3646 	    continue;
3647 
3648 	  /* Then check if this state is a subset of 'accepts'.  */
3649 	  not_subset = not_consumed = 0;
3650 	  for (k = 0; k < BITSET_WORDS; ++k)
3651 	    {
3652 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3653 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3654 	    }
3655 
3656 	  /* If this state isn't a subset of 'accepts', create a
3657 	     new group state, which has the 'remains'. */
3658 	  if (not_subset)
3659 	    {
3660 	      bitset_copy (dests_ch[ndests], remains);
3661 	      bitset_copy (dests_ch[j], intersec);
3662 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3663 	      if (BE (err != REG_NOERROR, 0))
3664 		goto error_return;
3665 	      ++ndests;
3666 	    }
3667 
3668 	  /* Put the position in the current group. */
3669 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3670 	  if (BE (! ok, 0))
3671 	    goto error_return;
3672 
3673 	  /* If all characters are consumed, go to next node. */
3674 	  if (!not_consumed)
3675 	    break;
3676 	}
3677       /* Some characters remain, create a new group. */
3678       if (j == ndests)
3679 	{
3680 	  bitset_copy (dests_ch[ndests], accepts);
3681 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3682 	  if (BE (err != REG_NOERROR, 0))
3683 	    goto error_return;
3684 	  ++ndests;
3685 	  bitset_empty (accepts);
3686 	}
3687     }
3688   return ndests;
3689  error_return:
3690   for (j = 0; j < ndests; ++j)
3691     re_node_set_free (dests_node + j);
3692   return -1;
3693 }
3694 
3695 #ifdef RE_ENABLE_I18N
3696 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3697    Return the number of the bytes the node accepts.
3698    STR_IDX is the current index of the input string.
3699 
3700    This function handles the nodes which can accept one character, or
3701    one collating element like '.', '[a-z]', opposite to the other nodes
3702    can only accept one byte.  */
3703 
3704 # ifdef _LIBC
3705 #  include <locale/weight.h>
3706 # endif
3707 
3708 static int
3709 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3710 			 const re_string_t *input, Idx str_idx)
3711 {
3712   const re_token_t *node = dfa->nodes + node_idx;
3713   int char_len, elem_len;
3714   Idx i;
3715 
3716   if (BE (node->type == OP_UTF8_PERIOD, 0))
3717     {
3718       unsigned char c = re_string_byte_at (input, str_idx), d;
3719       if (BE (c < 0xc2, 1))
3720 	return 0;
3721 
3722       if (str_idx + 2 > input->len)
3723 	return 0;
3724 
3725       d = re_string_byte_at (input, str_idx + 1);
3726       if (c < 0xe0)
3727 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3728       else if (c < 0xf0)
3729 	{
3730 	  char_len = 3;
3731 	  if (c == 0xe0 && d < 0xa0)
3732 	    return 0;
3733 	}
3734       else if (c < 0xf8)
3735 	{
3736 	  char_len = 4;
3737 	  if (c == 0xf0 && d < 0x90)
3738 	    return 0;
3739 	}
3740       else if (c < 0xfc)
3741 	{
3742 	  char_len = 5;
3743 	  if (c == 0xf8 && d < 0x88)
3744 	    return 0;
3745 	}
3746       else if (c < 0xfe)
3747 	{
3748 	  char_len = 6;
3749 	  if (c == 0xfc && d < 0x84)
3750 	    return 0;
3751 	}
3752       else
3753 	return 0;
3754 
3755       if (str_idx + char_len > input->len)
3756 	return 0;
3757 
3758       for (i = 1; i < char_len; ++i)
3759 	{
3760 	  d = re_string_byte_at (input, str_idx + i);
3761 	  if (d < 0x80 || d > 0xbf)
3762 	    return 0;
3763 	}
3764       return char_len;
3765     }
3766 
3767   char_len = re_string_char_size_at (input, str_idx);
3768   if (node->type == OP_PERIOD)
3769     {
3770       if (char_len <= 1)
3771 	return 0;
3772       /* FIXME: I don't think this if is needed, as both '\n'
3773 	 and '\0' are char_len == 1.  */
3774       /* '.' accepts any one character except the following two cases.  */
3775       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3776 	   re_string_byte_at (input, str_idx) == '\n') ||
3777 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3778 	   re_string_byte_at (input, str_idx) == '\0'))
3779 	return 0;
3780       return char_len;
3781     }
3782 
3783   elem_len = re_string_elem_size_at (input, str_idx);
3784   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3785     return 0;
3786 
3787   if (node->type == COMPLEX_BRACKET)
3788     {
3789       const re_charset_t *cset = node->opr.mbcset;
3790 # ifdef _LIBC
3791       const unsigned char *pin
3792 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3793       Idx j;
3794       uint32_t nrules;
3795 # endif /* _LIBC */
3796       int match_len = 0;
3797       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3798 		    ? re_string_wchar_at (input, str_idx) : 0);
3799 
3800       /* match with multibyte character?  */
3801       for (i = 0; i < cset->nmbchars; ++i)
3802 	if (wc == cset->mbchars[i])
3803 	  {
3804 	    match_len = char_len;
3805 	    goto check_node_accept_bytes_match;
3806 	  }
3807       /* match with character_class?  */
3808       for (i = 0; i < cset->nchar_classes; ++i)
3809 	{
3810 	  wctype_t wt = cset->char_classes[i];
3811 	  if (__iswctype (wc, wt))
3812 	    {
3813 	      match_len = char_len;
3814 	      goto check_node_accept_bytes_match;
3815 	    }
3816 	}
3817 
3818 # ifdef _LIBC
3819       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3820       if (nrules != 0)
3821 	{
3822 	  unsigned int in_collseq = 0;
3823 	  const int32_t *table, *indirect;
3824 	  const unsigned char *weights, *extra;
3825 	  const char *collseqwc;
3826 
3827 	  /* match with collating_symbol?  */
3828 	  if (cset->ncoll_syms)
3829 	    extra = (const unsigned char *)
3830 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3831 	  for (i = 0; i < cset->ncoll_syms; ++i)
3832 	    {
3833 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3834 	      /* Compare the length of input collating element and
3835 		 the length of current collating element.  */
3836 	      if (*coll_sym != elem_len)
3837 		continue;
3838 	      /* Compare each bytes.  */
3839 	      for (j = 0; j < *coll_sym; j++)
3840 		if (pin[j] != coll_sym[1 + j])
3841 		  break;
3842 	      if (j == *coll_sym)
3843 		{
3844 		  /* Match if every bytes is equal.  */
3845 		  match_len = j;
3846 		  goto check_node_accept_bytes_match;
3847 		}
3848 	    }
3849 
3850 	  if (cset->nranges)
3851 	    {
3852 	      if (elem_len <= char_len)
3853 		{
3854 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3855 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3856 		}
3857 	      else
3858 		in_collseq = find_collation_sequence_value (pin, elem_len);
3859 	    }
3860 	  /* match with range expression?  */
3861 	  /* FIXME: Implement rational ranges here, too.  */
3862 	  for (i = 0; i < cset->nranges; ++i)
3863 	    if (cset->range_starts[i] <= in_collseq
3864 		&& in_collseq <= cset->range_ends[i])
3865 	      {
3866 		match_len = elem_len;
3867 		goto check_node_accept_bytes_match;
3868 	      }
3869 
3870 	  /* match with equivalence_class?  */
3871 	  if (cset->nequiv_classes)
3872 	    {
3873 	      const unsigned char *cp = pin;
3874 	      table = (const int32_t *)
3875 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3876 	      weights = (const unsigned char *)
3877 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3878 	      extra = (const unsigned char *)
3879 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3880 	      indirect = (const int32_t *)
3881 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3882 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3883 	      int32_t rule = idx >> 24;
3884 	      idx &= 0xffffff;
3885 	      if (idx > 0)
3886 		{
3887 		  size_t weight_len = weights[idx];
3888 		  for (i = 0; i < cset->nequiv_classes; ++i)
3889 		    {
3890 		      int32_t equiv_class_idx = cset->equiv_classes[i];
3891 		      int32_t equiv_class_rule = equiv_class_idx >> 24;
3892 		      equiv_class_idx &= 0xffffff;
3893 		      if (weights[equiv_class_idx] == weight_len
3894 			  && equiv_class_rule == rule
3895 			  && memcmp (weights + idx + 1,
3896 				     weights + equiv_class_idx + 1,
3897 				     weight_len) == 0)
3898 			{
3899 			  match_len = elem_len;
3900 			  goto check_node_accept_bytes_match;
3901 			}
3902 		    }
3903 		}
3904 	    }
3905 	}
3906       else
3907 # endif /* _LIBC */
3908 	{
3909 	  /* match with range expression?  */
3910 	  for (i = 0; i < cset->nranges; ++i)
3911 	    {
3912 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3913 		{
3914 		  match_len = char_len;
3915 		  goto check_node_accept_bytes_match;
3916 		}
3917 	    }
3918 	}
3919     check_node_accept_bytes_match:
3920       if (!cset->non_match)
3921 	return match_len;
3922       else
3923 	{
3924 	  if (match_len > 0)
3925 	    return 0;
3926 	  else
3927 	    return (elem_len > char_len) ? elem_len : char_len;
3928 	}
3929     }
3930   return 0;
3931 }
3932 
3933 # ifdef _LIBC
3934 static unsigned int
3935 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3936 {
3937   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3938   if (nrules == 0)
3939     {
3940       if (mbs_len == 1)
3941 	{
3942 	  /* No valid character.  Match it as a single byte character.  */
3943 	  const unsigned char *collseq = (const unsigned char *)
3944 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3945 	  return collseq[mbs[0]];
3946 	}
3947       return UINT_MAX;
3948     }
3949   else
3950     {
3951       int32_t idx;
3952       const unsigned char *extra = (const unsigned char *)
3953 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3954       int32_t extrasize = (const unsigned char *)
3955 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3956 
3957       for (idx = 0; idx < extrasize;)
3958 	{
3959 	  int mbs_cnt;
3960 	  bool found = false;
3961 	  int32_t elem_mbs_len;
3962 	  /* Skip the name of collating element name.  */
3963 	  idx = idx + extra[idx] + 1;
3964 	  elem_mbs_len = extra[idx++];
3965 	  if (mbs_len == elem_mbs_len)
3966 	    {
3967 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3968 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3969 		  break;
3970 	      if (mbs_cnt == elem_mbs_len)
3971 		/* Found the entry.  */
3972 		found = true;
3973 	    }
3974 	  /* Skip the byte sequence of the collating element.  */
3975 	  idx += elem_mbs_len;
3976 	  /* Adjust for the alignment.  */
3977 	  idx = (idx + 3) & ~3;
3978 	  /* Skip the collation sequence value.  */
3979 	  idx += sizeof (uint32_t);
3980 	  /* Skip the wide char sequence of the collating element.  */
3981 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3982 	  /* If we found the entry, return the sequence value.  */
3983 	  if (found)
3984 	    return *(uint32_t *) (extra + idx);
3985 	  /* Skip the collation sequence value.  */
3986 	  idx += sizeof (uint32_t);
3987 	}
3988       return UINT_MAX;
3989     }
3990 }
3991 # endif /* _LIBC */
3992 #endif /* RE_ENABLE_I18N */
3993 
3994 /* Check whether the node accepts the byte which is IDX-th
3995    byte of the INPUT.  */
3996 
3997 static bool
3998 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3999 		   Idx idx)
4000 {
4001   unsigned char ch;
4002   ch = re_string_byte_at (&mctx->input, idx);
4003   switch (node->type)
4004     {
4005     case CHARACTER:
4006       if (node->opr.c != ch)
4007         return false;
4008       break;
4009 
4010     case SIMPLE_BRACKET:
4011       if (!bitset_contain (node->opr.sbcset, ch))
4012         return false;
4013       break;
4014 
4015 #ifdef RE_ENABLE_I18N
4016     case OP_UTF8_PERIOD:
4017       if (ch >= ASCII_CHARS)
4018         return false;
4019       FALLTHROUGH;
4020 #endif
4021     case OP_PERIOD:
4022       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4023 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4024 	return false;
4025       break;
4026 
4027     default:
4028       return false;
4029     }
4030 
4031   if (node->constraint)
4032     {
4033       /* The node has constraints.  Check whether the current context
4034 	 satisfies the constraints.  */
4035       unsigned int context = re_string_context_at (&mctx->input, idx,
4036 						   mctx->eflags);
4037       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4038 	return false;
4039     }
4040 
4041   return true;
4042 }
4043 
4044 /* Extend the buffers, if the buffers have run out.  */
4045 
4046 static reg_errcode_t
4047 __attribute_warn_unused_result__
4048 extend_buffers (re_match_context_t *mctx, int min_len)
4049 {
4050   reg_errcode_t ret;
4051   re_string_t *pstr = &mctx->input;
4052 
4053   /* Avoid overflow.  */
4054   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4055           <= pstr->bufs_len, 0))
4056     return REG_ESPACE;
4057 
4058   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4059   ret = re_string_realloc_buffers (pstr,
4060 				   MAX (min_len,
4061 					MIN (pstr->len, pstr->bufs_len * 2)));
4062   if (BE (ret != REG_NOERROR, 0))
4063     return ret;
4064 
4065   if (mctx->state_log != NULL)
4066     {
4067       /* And double the length of state_log.  */
4068       /* XXX We have no indication of the size of this buffer.  If this
4069 	 allocation fail we have no indication that the state_log array
4070 	 does not have the right size.  */
4071       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4072 					      pstr->bufs_len + 1);
4073       if (BE (new_array == NULL, 0))
4074 	return REG_ESPACE;
4075       mctx->state_log = new_array;
4076     }
4077 
4078   /* Then reconstruct the buffers.  */
4079   if (pstr->icase)
4080     {
4081 #ifdef RE_ENABLE_I18N
4082       if (pstr->mb_cur_max > 1)
4083 	{
4084 	  ret = build_wcs_upper_buffer (pstr);
4085 	  if (BE (ret != REG_NOERROR, 0))
4086 	    return ret;
4087 	}
4088       else
4089 #endif /* RE_ENABLE_I18N  */
4090 	build_upper_buffer (pstr);
4091     }
4092   else
4093     {
4094 #ifdef RE_ENABLE_I18N
4095       if (pstr->mb_cur_max > 1)
4096 	build_wcs_buffer (pstr);
4097       else
4098 #endif /* RE_ENABLE_I18N  */
4099 	{
4100 	  if (pstr->trans != NULL)
4101 	    re_string_translate_buffer (pstr);
4102 	}
4103     }
4104   return REG_NOERROR;
4105 }
4106 
4107 
4108 /* Functions for matching context.  */
4109 
4110 /* Initialize MCTX.  */
4111 
4112 static reg_errcode_t
4113 __attribute_warn_unused_result__
4114 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4115 {
4116   mctx->eflags = eflags;
4117   mctx->match_last = -1;
4118   if (n > 0)
4119     {
4120       /* Avoid overflow.  */
4121       size_t max_object_size =
4122 	MAX (sizeof (struct re_backref_cache_entry),
4123 	     sizeof (re_sub_match_top_t *));
4124       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4125 	return REG_ESPACE;
4126 
4127       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4128       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4129       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4130 	return REG_ESPACE;
4131     }
4132   /* Already zero-ed by the caller.
4133      else
4134        mctx->bkref_ents = NULL;
4135      mctx->nbkref_ents = 0;
4136      mctx->nsub_tops = 0;  */
4137   mctx->abkref_ents = n;
4138   mctx->max_mb_elem_len = 1;
4139   mctx->asub_tops = n;
4140   return REG_NOERROR;
4141 }
4142 
4143 /* Clean the entries which depend on the current input in MCTX.
4144    This function must be invoked when the matcher changes the start index
4145    of the input, or changes the input string.  */
4146 
4147 static void
4148 match_ctx_clean (re_match_context_t *mctx)
4149 {
4150   Idx st_idx;
4151   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4152     {
4153       Idx sl_idx;
4154       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4155       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4156 	{
4157 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4158 	  re_free (last->path.array);
4159 	  re_free (last);
4160 	}
4161       re_free (top->lasts);
4162       if (top->path)
4163 	{
4164 	  re_free (top->path->array);
4165 	  re_free (top->path);
4166 	}
4167       re_free (top);
4168     }
4169 
4170   mctx->nsub_tops = 0;
4171   mctx->nbkref_ents = 0;
4172 }
4173 
4174 /* Free all the memory associated with MCTX.  */
4175 
4176 static void
4177 match_ctx_free (re_match_context_t *mctx)
4178 {
4179   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4180   match_ctx_clean (mctx);
4181   re_free (mctx->sub_tops);
4182   re_free (mctx->bkref_ents);
4183 }
4184 
4185 /* Add a new backreference entry to MCTX.
4186    Note that we assume that caller never call this function with duplicate
4187    entry, and call with STR_IDX which isn't smaller than any existing entry.
4188 */
4189 
4190 static reg_errcode_t
4191 __attribute_warn_unused_result__
4192 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4193 		     Idx to)
4194 {
4195   if (mctx->nbkref_ents >= mctx->abkref_ents)
4196     {
4197       struct re_backref_cache_entry* new_entry;
4198       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4199 			      mctx->abkref_ents * 2);
4200       if (BE (new_entry == NULL, 0))
4201 	{
4202 	  re_free (mctx->bkref_ents);
4203 	  return REG_ESPACE;
4204 	}
4205       mctx->bkref_ents = new_entry;
4206       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4207 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4208       mctx->abkref_ents *= 2;
4209     }
4210   if (mctx->nbkref_ents > 0
4211       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4212     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4213 
4214   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4215   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4216   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4217   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4218 
4219   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4220      If bit N is clear, means that this entry won't epsilon-transition to
4221      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4222      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4223      such node.
4224 
4225      A backreference does not epsilon-transition unless it is empty, so set
4226      to all zeros if FROM != TO.  */
4227   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4228     = (from == to ? -1 : 0);
4229 
4230   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4231   if (mctx->max_mb_elem_len < to - from)
4232     mctx->max_mb_elem_len = to - from;
4233   return REG_NOERROR;
4234 }
4235 
4236 /* Return the first entry with the same str_idx, or -1 if none is
4237    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4238 
4239 static Idx
4240 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4241 {
4242   Idx left, right, mid, last;
4243   last = right = mctx->nbkref_ents;
4244   for (left = 0; left < right;)
4245     {
4246       mid = (left + right) / 2;
4247       if (mctx->bkref_ents[mid].str_idx < str_idx)
4248 	left = mid + 1;
4249       else
4250 	right = mid;
4251     }
4252   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4253     return left;
4254   else
4255     return -1;
4256 }
4257 
4258 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4259    at STR_IDX.  */
4260 
4261 static reg_errcode_t
4262 __attribute_warn_unused_result__
4263 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4264 {
4265 #ifdef DEBUG
4266   assert (mctx->sub_tops != NULL);
4267   assert (mctx->asub_tops > 0);
4268 #endif
4269   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4270     {
4271       Idx new_asub_tops = mctx->asub_tops * 2;
4272       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4273 						   re_sub_match_top_t *,
4274 						   new_asub_tops);
4275       if (BE (new_array == NULL, 0))
4276 	return REG_ESPACE;
4277       mctx->sub_tops = new_array;
4278       mctx->asub_tops = new_asub_tops;
4279     }
4280   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4281   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4282     return REG_ESPACE;
4283   mctx->sub_tops[mctx->nsub_tops]->node = node;
4284   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4285   return REG_NOERROR;
4286 }
4287 
4288 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4289    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4290 
4291 static re_sub_match_last_t *
4292 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4293 {
4294   re_sub_match_last_t *new_entry;
4295   if (BE (subtop->nlasts == subtop->alasts, 0))
4296     {
4297       Idx new_alasts = 2 * subtop->alasts + 1;
4298       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4299 						    re_sub_match_last_t *,
4300 						    new_alasts);
4301       if (BE (new_array == NULL, 0))
4302 	return NULL;
4303       subtop->lasts = new_array;
4304       subtop->alasts = new_alasts;
4305     }
4306   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4307   if (BE (new_entry != NULL, 1))
4308     {
4309       subtop->lasts[subtop->nlasts] = new_entry;
4310       new_entry->node = node;
4311       new_entry->str_idx = str_idx;
4312       ++subtop->nlasts;
4313     }
4314   return new_entry;
4315 }
4316 
4317 static void
4318 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4319 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4320 {
4321   sctx->sifted_states = sifted_sts;
4322   sctx->limited_states = limited_sts;
4323   sctx->last_node = last_node;
4324   sctx->last_str_idx = last_str_idx;
4325   re_node_set_init_empty (&sctx->limits);
4326 }
4327