regex_automata/meta/
strategy.rs

1use core::{
2    fmt::Debug,
3    panic::{RefUnwindSafe, UnwindSafe},
4};
5
6use alloc::sync::Arc;
7
8use regex_syntax::hir::{literal, Hir};
9
10use crate::{
11    meta::{
12        error::{BuildError, RetryError, RetryFailError, RetryQuadraticError},
13        regex::{Cache, RegexInfo},
14        reverse_inner, wrappers,
15    },
16    nfa::thompson::{self, WhichCaptures, NFA},
17    util::{
18        captures::{Captures, GroupInfo},
19        look::LookMatcher,
20        prefilter::{self, Prefilter, PrefilterI},
21        primitives::{NonMaxUsize, PatternID},
22        search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet},
23    },
24};
25
26/// A trait that represents a single meta strategy. Its main utility is in
27/// providing a way to do dynamic dispatch over a few choices.
28///
29/// Why dynamic dispatch? I actually don't have a super compelling reason, and
30/// importantly, I have not benchmarked it with the main alternative: an enum.
31/// I went with dynamic dispatch initially because the regex engine search code
32/// really can't be inlined into caller code in most cases because it's just
33/// too big. In other words, it is already expected that every regex search
34/// will entail at least the cost of a function call.
35///
36/// I do wonder whether using enums would result in better codegen overall
37/// though. It's a worthwhile experiment to try. Probably the most interesting
38/// benchmark to run in such a case would be one with a high match count. That
39/// is, a benchmark to test the overall latency of a search call.
40pub(super) trait Strategy:
41    Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
42{
43    fn group_info(&self) -> &GroupInfo;
44
45    fn create_cache(&self) -> Cache;
46
47    fn reset_cache(&self, cache: &mut Cache);
48
49    fn is_accelerated(&self) -> bool;
50
51    fn memory_usage(&self) -> usize;
52
53    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>;
54
55    fn search_half(
56        &self,
57        cache: &mut Cache,
58        input: &Input<'_>,
59    ) -> Option<HalfMatch>;
60
61    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
62
63    fn search_slots(
64        &self,
65        cache: &mut Cache,
66        input: &Input<'_>,
67        slots: &mut [Option<NonMaxUsize>],
68    ) -> Option<PatternID>;
69
70    fn which_overlapping_matches(
71        &self,
72        cache: &mut Cache,
73        input: &Input<'_>,
74        patset: &mut PatternSet,
75    );
76}
77
78pub(super) fn new(
79    info: &RegexInfo,
80    hirs: &[&Hir],
81) -> Result<Arc<dyn Strategy>, BuildError> {
82    // At this point, we're committed to a regex engine of some kind. So pull
83    // out a prefilter if we can, which will feed to each of the constituent
84    // regex engines.
85    let pre = if info.is_always_anchored_start() {
86        // PERF: I'm not sure we necessarily want to do this... We may want to
87        // run a prefilter for quickly rejecting in some cases. The problem
88        // is that anchored searches overlap quite a bit with the use case
89        // of "run a regex on every line to extract data." In that case, the
90        // regex always matches, so running a prefilter doesn't really help us
91        // there. The main place where a prefilter helps in an anchored search
92        // is if the anchored search is not expected to match frequently. That
93        // is, the prefilter gives us a way to possibly reject a haystack very
94        // quickly.
95        //
96        // Maybe we should do use a prefilter, but only for longer haystacks?
97        // Or maybe we should only use a prefilter when we think it's "fast"?
98        //
99        // Interestingly, I think we currently lack the infrastructure for
100        // disabling a prefilter based on haystack length. That would probably
101        // need to be a new 'Input' option. (Interestingly, an 'Input' used to
102        // carry a 'Prefilter' with it, but I moved away from that.)
103        debug!("skipping literal extraction since regex is anchored");
104        None
105    } else if let Some(pre) = info.config().get_prefilter() {
106        debug!(
107            "skipping literal extraction since the caller provided a prefilter"
108        );
109        Some(pre.clone())
110    } else if info.config().get_auto_prefilter() {
111        let kind = info.config().get_match_kind();
112        let prefixes = crate::util::prefilter::prefixes(kind, hirs);
113        // If we can build a full `Strategy` from just the extracted prefixes,
114        // then we can short-circuit and avoid building a regex engine at all.
115        if let Some(pre) = Pre::from_prefixes(info, &prefixes) {
116            debug!(
117                "found that the regex can be broken down to a literal \
118                 search, avoiding the regex engine entirely",
119            );
120            return Ok(pre);
121        }
122        // This now attempts another short-circuit of the regex engine: if we
123        // have a huge alternation of just plain literals, then we can just use
124        // Aho-Corasick for that and avoid the regex engine entirely.
125        //
126        // You might think this case would just be handled by
127        // `Pre::from_prefixes`, but that technique relies on heuristic literal
128        // extraction from the corresponding `Hir`. That works, but part of
129        // heuristics limit the size and number of literals returned. This case
130        // will specifically handle patterns with very large alternations.
131        //
132        // One wonders if we should just roll this our heuristic literal
133        // extraction, and then I think this case could disappear entirely.
134        if let Some(pre) = Pre::from_alternation_literals(info, hirs) {
135            debug!(
136                "found plain alternation of literals, \
137                 avoiding regex engine entirely and using Aho-Corasick"
138            );
139            return Ok(pre);
140        }
141        prefixes.literals().and_then(|strings| {
142            debug!(
143                "creating prefilter from {} literals: {:?}",
144                strings.len(),
145                strings,
146            );
147            Prefilter::new(kind, strings)
148        })
149    } else {
150        debug!("skipping literal extraction since prefilters were disabled");
151        None
152    };
153    let mut core = Core::new(info.clone(), pre.clone(), hirs)?;
154    // Now that we have our core regex engines built, there are a few cases
155    // where we can do a little bit better than just a normal "search forward
156    // and maybe use a prefilter when in a start state." However, these cases
157    // may not always work or otherwise build on top of the Core searcher.
158    // For example, the reverse anchored optimization seems like it might
159    // always work, but only the DFAs support reverse searching and the DFAs
160    // might give up or quit for reasons. If we had, e.g., a PikeVM that
161    // supported reverse searching, then we could avoid building a full Core
162    // engine for this case.
163    core = match ReverseAnchored::new(core) {
164        Err(core) => core,
165        Ok(ra) => {
166            debug!("using reverse anchored strategy");
167            return Ok(Arc::new(ra));
168        }
169    };
170    core = match ReverseSuffix::new(core, hirs) {
171        Err(core) => core,
172        Ok(rs) => {
173            debug!("using reverse suffix strategy");
174            return Ok(Arc::new(rs));
175        }
176    };
177    core = match ReverseInner::new(core, hirs) {
178        Err(core) => core,
179        Ok(ri) => {
180            debug!("using reverse inner strategy");
181            return Ok(Arc::new(ri));
182        }
183    };
184    debug!("using core strategy");
185    Ok(Arc::new(core))
186}
187
188#[derive(Clone, Debug)]
189struct Pre<P> {
190    pre: P,
191    group_info: GroupInfo,
192}
193
194impl<P: PrefilterI> Pre<P> {
195    fn new(pre: P) -> Arc<dyn Strategy> {
196        // The only thing we support when we use prefilters directly as a
197        // strategy is the start and end of the overall match for a single
198        // pattern. In other words, exactly one implicit capturing group. Which
199        // is exactly what we use here for a GroupInfo.
200        let group_info = GroupInfo::new([[None::<&str>]]).unwrap();
201        Arc::new(Pre { pre, group_info })
202    }
203}
204
205// This is a little weird, but we don't actually care about the type parameter
206// here because we're selecting which underlying prefilter to use. So we just
207// define it on an arbitrary type.
208impl Pre<()> {
209    /// Given a sequence of prefixes, attempt to return a full `Strategy` using
210    /// just the prefixes.
211    ///
212    /// Basically, this occurs when the prefixes given not just prefixes,
213    /// but an enumeration of the entire language matched by the regular
214    /// expression.
215    ///
216    /// A number of other conditions need to be true too. For example, there
217    /// can be only one pattern, the number of explicit capture groups is 0, no
218    /// look-around assertions and so on.
219    ///
220    /// Note that this ignores `Config::get_auto_prefilter` because if this
221    /// returns something, then it isn't a prefilter but a matcher itself.
222    /// Therefore, it shouldn't suffer from the problems typical to prefilters
223    /// (such as a high false positive rate).
224    fn from_prefixes(
225        info: &RegexInfo,
226        prefixes: &literal::Seq,
227    ) -> Option<Arc<dyn Strategy>> {
228        let kind = info.config().get_match_kind();
229        // Check to see if our prefixes are exact, which means we might be
230        // able to bypass the regex engine entirely and just rely on literal
231        // searches.
232        if !prefixes.is_exact() {
233            return None;
234        }
235        // We also require that we have a single regex pattern. Namely,
236        // we reuse the prefilter infrastructure to implement search and
237        // prefilters only report spans. Prefilters don't know about pattern
238        // IDs. The multi-regex case isn't a lost cause, we might still use
239        // Aho-Corasick and we might still just use a regular prefilter, but
240        // that's done below.
241        if info.pattern_len() != 1 {
242            return None;
243        }
244        // We can't have any capture groups either. The literal engines don't
245        // know how to deal with things like '(foo)(bar)'. In that case, a
246        // prefilter will just be used and then the regex engine will resolve
247        // the capture groups.
248        if info.props()[0].explicit_captures_len() != 0 {
249            return None;
250        }
251        // We also require that it has zero look-around assertions. Namely,
252        // literal extraction treats look-around assertions as if they match
253        // *every* empty string. But of course, that isn't true. So for
254        // example, 'foo\bquux' never matches anything, but 'fooquux' is
255        // extracted from that as an exact literal. Such cases should just run
256        // the regex engine. 'fooquux' will be used as a normal prefilter, and
257        // then the regex engine will try to look for an actual match.
258        if !info.props()[0].look_set().is_empty() {
259            return None;
260        }
261        // Finally, currently, our prefilters are all oriented around
262        // leftmost-first match semantics, so don't try to use them if the
263        // caller asked for anything else.
264        if kind != MatchKind::LeftmostFirst {
265            return None;
266        }
267        // The above seems like a lot of requirements to meet, but it applies
268        // to a lot of cases. 'foo', '[abc][123]' and 'foo|bar|quux' all meet
269        // the above criteria, for example.
270        //
271        // Note that this is effectively a latency optimization. If we didn't
272        // do this, then the extracted literals would still get bundled into
273        // a prefilter, and every regex engine capable of running unanchored
274        // searches supports prefilters. So this optimization merely sidesteps
275        // having to run the regex engine at all to confirm the match. Thus, it
276        // decreases the latency of a match.
277
278        // OK because we know the set is exact and thus finite.
279        let prefixes = prefixes.literals().unwrap();
280        debug!(
281            "trying to bypass regex engine by creating \
282             prefilter from {} literals: {:?}",
283            prefixes.len(),
284            prefixes,
285        );
286        let choice = match prefilter::Choice::new(kind, prefixes) {
287            Some(choice) => choice,
288            None => {
289                debug!(
290                    "regex bypass failed because no prefilter could be built"
291                );
292                return None;
293            }
294        };
295        let strat: Arc<dyn Strategy> = match choice {
296            prefilter::Choice::Memchr(pre) => Pre::new(pre),
297            prefilter::Choice::Memchr2(pre) => Pre::new(pre),
298            prefilter::Choice::Memchr3(pre) => Pre::new(pre),
299            prefilter::Choice::Memmem(pre) => Pre::new(pre),
300            prefilter::Choice::Teddy(pre) => Pre::new(pre),
301            prefilter::Choice::ByteSet(pre) => Pre::new(pre),
302            prefilter::Choice::AhoCorasick(pre) => Pre::new(pre),
303        };
304        Some(strat)
305    }
306
307    /// Attempts to extract an alternation of literals, and if it's deemed
308    /// worth doing, returns an Aho-Corasick prefilter as a strategy.
309    ///
310    /// And currently, this only returns something when 'hirs.len() == 1'. This
311    /// could in theory do something if there are multiple HIRs where all of
312    /// them are alternation of literals, but I haven't had the time to go down
313    /// that path yet.
314    fn from_alternation_literals(
315        info: &RegexInfo,
316        hirs: &[&Hir],
317    ) -> Option<Arc<dyn Strategy>> {
318        use crate::util::prefilter::AhoCorasick;
319
320        let lits = crate::meta::literal::alternation_literals(info, hirs)?;
321        let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?;
322        Some(Pre::new(ac))
323    }
324}
325
326// This implements Strategy for anything that implements PrefilterI.
327//
328// Note that this must only be used for regexes of length 1. Multi-regexes
329// don't work here. The prefilter interface only provides the span of a match
330// and not the pattern ID. (I did consider making it more expressive, but I
331// couldn't figure out how to tie everything together elegantly.) Thus, so long
332// as the regex only contains one pattern, we can simply assume that a match
333// corresponds to PatternID::ZERO. And indeed, that's what we do here.
334//
335// In practice, since this impl is used to report matches directly and thus
336// completely bypasses the regex engine, we only wind up using this under the
337// following restrictions:
338//
339// * There must be only one pattern. As explained above.
340// * The literal sequence must be finite and only contain exact literals.
341// * There must not be any look-around assertions. If there are, the literals
342// extracted might be exact, but a match doesn't necessarily imply an overall
343// match. As a trivial example, 'foo\bbar' does not match 'foobar'.
344// * The pattern must not have any explicit capturing groups. If it does, the
345// caller might expect them to be resolved. e.g., 'foo(bar)'.
346//
347// So when all of those things are true, we use a prefilter directly as a
348// strategy.
349//
350// In the case where the number of patterns is more than 1, we don't use this
351// but do use a special Aho-Corasick strategy if all of the regexes are just
352// simple literals or alternations of literals. (We also use the Aho-Corasick
353// strategy when len(patterns)==1 if the number of literals is large. In that
354// case, literal extraction gives up and will return an infinite set.)
355impl<P: PrefilterI> Strategy for Pre<P> {
356    #[cfg_attr(feature = "perf-inline", inline(always))]
357    fn group_info(&self) -> &GroupInfo {
358        &self.group_info
359    }
360
361    fn create_cache(&self) -> Cache {
362        Cache {
363            capmatches: Captures::all(self.group_info().clone()),
364            pikevm: wrappers::PikeVMCache::none(),
365            backtrack: wrappers::BoundedBacktrackerCache::none(),
366            onepass: wrappers::OnePassCache::none(),
367            hybrid: wrappers::HybridCache::none(),
368            revhybrid: wrappers::ReverseHybridCache::none(),
369        }
370    }
371
372    fn reset_cache(&self, _cache: &mut Cache) {}
373
374    fn is_accelerated(&self) -> bool {
375        self.pre.is_fast()
376    }
377
378    fn memory_usage(&self) -> usize {
379        self.pre.memory_usage()
380    }
381
382    #[cfg_attr(feature = "perf-inline", inline(always))]
383    fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
384        if input.is_done() {
385            return None;
386        }
387        if input.get_anchored().is_anchored() {
388            return self
389                .pre
390                .prefix(input.haystack(), input.get_span())
391                .map(|sp| Match::new(PatternID::ZERO, sp));
392        }
393        self.pre
394            .find(input.haystack(), input.get_span())
395            .map(|sp| Match::new(PatternID::ZERO, sp))
396    }
397
398    #[cfg_attr(feature = "perf-inline", inline(always))]
399    fn search_half(
400        &self,
401        cache: &mut Cache,
402        input: &Input<'_>,
403    ) -> Option<HalfMatch> {
404        self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
405    }
406
407    #[cfg_attr(feature = "perf-inline", inline(always))]
408    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
409        self.search(cache, input).is_some()
410    }
411
412    #[cfg_attr(feature = "perf-inline", inline(always))]
413    fn search_slots(
414        &self,
415        cache: &mut Cache,
416        input: &Input<'_>,
417        slots: &mut [Option<NonMaxUsize>],
418    ) -> Option<PatternID> {
419        let m = self.search(cache, input)?;
420        if let Some(slot) = slots.get_mut(0) {
421            *slot = NonMaxUsize::new(m.start());
422        }
423        if let Some(slot) = slots.get_mut(1) {
424            *slot = NonMaxUsize::new(m.end());
425        }
426        Some(m.pattern())
427    }
428
429    #[cfg_attr(feature = "perf-inline", inline(always))]
430    fn which_overlapping_matches(
431        &self,
432        cache: &mut Cache,
433        input: &Input<'_>,
434        patset: &mut PatternSet,
435    ) {
436        if self.search(cache, input).is_some() {
437            patset.insert(PatternID::ZERO);
438        }
439    }
440}
441
442#[derive(Debug)]
443struct Core {
444    info: RegexInfo,
445    pre: Option<Prefilter>,
446    nfa: NFA,
447    nfarev: Option<NFA>,
448    pikevm: wrappers::PikeVM,
449    backtrack: wrappers::BoundedBacktracker,
450    onepass: wrappers::OnePass,
451    hybrid: wrappers::Hybrid,
452    dfa: wrappers::DFA,
453}
454
455impl Core {
456    fn new(
457        info: RegexInfo,
458        pre: Option<Prefilter>,
459        hirs: &[&Hir],
460    ) -> Result<Core, BuildError> {
461        let mut lookm = LookMatcher::new();
462        lookm.set_line_terminator(info.config().get_line_terminator());
463        let thompson_config = thompson::Config::new()
464            .utf8(info.config().get_utf8_empty())
465            .nfa_size_limit(info.config().get_nfa_size_limit())
466            .shrink(false)
467            .which_captures(info.config().get_which_captures())
468            .look_matcher(lookm);
469        let nfa = thompson::Compiler::new()
470            .configure(thompson_config.clone())
471            .build_many_from_hir(hirs)
472            .map_err(BuildError::nfa)?;
473        // It's possible for the PikeVM or the BB to fail to build, even though
474        // at this point, we already have a full NFA in hand. They can fail
475        // when a Unicode word boundary is used but where Unicode word boundary
476        // support is disabled at compile time, thus making it impossible to
477        // match. (Construction can also fail if the NFA was compiled without
478        // captures, but we always enable that above.)
479        let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)?;
480        let backtrack =
481            wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)?;
482        // The onepass engine can of course fail to build, but we expect it to
483        // fail in many cases because it is an optimization that doesn't apply
484        // to all regexes. The 'OnePass' wrapper encapsulates this failure (and
485        // logs a message if it occurs).
486        let onepass = wrappers::OnePass::new(&info, &nfa);
487        // We try to encapsulate whether a particular regex engine should be
488        // used within each respective wrapper, but the DFAs need a reverse NFA
489        // to build itself, and we really do not want to build a reverse NFA if
490        // we know we aren't going to use the lazy DFA. So we do a config check
491        // up front, which is in practice the only way we won't try to use the
492        // DFA.
493        let (nfarev, hybrid, dfa) =
494            if !info.config().get_hybrid() && !info.config().get_dfa() {
495                (None, wrappers::Hybrid::none(), wrappers::DFA::none())
496            } else {
497                // FIXME: Technically, we don't quite yet KNOW that we need
498                // a reverse NFA. It's possible for the DFAs below to both
499                // fail to build just based on the forward NFA. In which case,
500                // building the reverse NFA was totally wasted work. But...
501                // fixing this requires breaking DFA construction apart into
502                // two pieces: one for the forward part and another for the
503                // reverse part. Quite annoying. Making it worse, when building
504                // both DFAs fails, it's quite likely that the NFA is large and
505                // that it will take quite some time to build the reverse NFA
506                // too. So... it's really probably worth it to do this!
507                let nfarev = thompson::Compiler::new()
508                    // Currently, reverse NFAs don't support capturing groups,
509                    // so we MUST disable them. But even if we didn't have to,
510                    // we would, because nothing in this crate does anything
511                    // useful with capturing groups in reverse. And of course,
512                    // the lazy DFA ignores capturing groups in all cases.
513                    .configure(
514                        thompson_config
515                            .clone()
516                            .which_captures(WhichCaptures::None)
517                            .reverse(true),
518                    )
519                    .build_many_from_hir(hirs)
520                    .map_err(BuildError::nfa)?;
521                let dfa = if !info.config().get_dfa() {
522                    wrappers::DFA::none()
523                } else {
524                    wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
525                };
526                let hybrid = if !info.config().get_hybrid() {
527                    wrappers::Hybrid::none()
528                } else if dfa.is_some() {
529                    debug!("skipping lazy DFA because we have a full DFA");
530                    wrappers::Hybrid::none()
531                } else {
532                    wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
533                };
534                (Some(nfarev), hybrid, dfa)
535            };
536        Ok(Core {
537            info,
538            pre,
539            nfa,
540            nfarev,
541            pikevm,
542            backtrack,
543            onepass,
544            hybrid,
545            dfa,
546        })
547    }
548
549    #[cfg_attr(feature = "perf-inline", inline(always))]
550    fn try_search_mayfail(
551        &self,
552        cache: &mut Cache,
553        input: &Input<'_>,
554    ) -> Option<Result<Option<Match>, RetryFailError>> {
555        if let Some(e) = self.dfa.get(input) {
556            trace!("using full DFA for search at {:?}", input.get_span());
557            Some(e.try_search(input))
558        } else if let Some(e) = self.hybrid.get(input) {
559            trace!("using lazy DFA for search at {:?}", input.get_span());
560            Some(e.try_search(&mut cache.hybrid, input))
561        } else {
562            None
563        }
564    }
565
566    fn search_nofail(
567        &self,
568        cache: &mut Cache,
569        input: &Input<'_>,
570    ) -> Option<Match> {
571        let caps = &mut cache.capmatches;
572        caps.set_pattern(None);
573        // We manually inline 'try_search_slots_nofail' here because we need to
574        // borrow from 'cache.capmatches' in this method, but if we do, then
575        // we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a
576        // classic example of how the borrow checker inhibits decomposition.
577        // There are of course work-arounds (more types and/or interior
578        // mutability), but that's more annoying than this IMO.
579        let pid = if let Some(ref e) = self.onepass.get(input) {
580            trace!("using OnePass for search at {:?}", input.get_span());
581            e.search_slots(&mut cache.onepass, input, caps.slots_mut())
582        } else if let Some(ref e) = self.backtrack.get(input) {
583            trace!(
584                "using BoundedBacktracker for search at {:?}",
585                input.get_span()
586            );
587            e.search_slots(&mut cache.backtrack, input, caps.slots_mut())
588        } else {
589            trace!("using PikeVM for search at {:?}", input.get_span());
590            let e = self.pikevm.get();
591            e.search_slots(&mut cache.pikevm, input, caps.slots_mut())
592        };
593        caps.set_pattern(pid);
594        caps.get_match()
595    }
596
597    fn search_half_nofail(
598        &self,
599        cache: &mut Cache,
600        input: &Input<'_>,
601    ) -> Option<HalfMatch> {
602        // Only the lazy/full DFA returns half-matches, since the DFA requires
603        // a reverse scan to find the start position. These fallback regex
604        // engines can find the start and end in a single pass, so we just do
605        // that and throw away the start offset to conform to the API.
606        let m = self.search_nofail(cache, input)?;
607        Some(HalfMatch::new(m.pattern(), m.end()))
608    }
609
610    fn search_slots_nofail(
611        &self,
612        cache: &mut Cache,
613        input: &Input<'_>,
614        slots: &mut [Option<NonMaxUsize>],
615    ) -> Option<PatternID> {
616        if let Some(ref e) = self.onepass.get(input) {
617            trace!(
618                "using OnePass for capture search at {:?}",
619                input.get_span()
620            );
621            e.search_slots(&mut cache.onepass, input, slots)
622        } else if let Some(ref e) = self.backtrack.get(input) {
623            trace!(
624                "using BoundedBacktracker for capture search at {:?}",
625                input.get_span()
626            );
627            e.search_slots(&mut cache.backtrack, input, slots)
628        } else {
629            trace!(
630                "using PikeVM for capture search at {:?}",
631                input.get_span()
632            );
633            let e = self.pikevm.get();
634            e.search_slots(&mut cache.pikevm, input, slots)
635        }
636    }
637
638    fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
639        if let Some(ref e) = self.onepass.get(input) {
640            trace!(
641                "using OnePass for is-match search at {:?}",
642                input.get_span()
643            );
644            e.search_slots(&mut cache.onepass, input, &mut []).is_some()
645        } else if let Some(ref e) = self.backtrack.get(input) {
646            trace!(
647                "using BoundedBacktracker for is-match search at {:?}",
648                input.get_span()
649            );
650            e.is_match(&mut cache.backtrack, input)
651        } else {
652            trace!(
653                "using PikeVM for is-match search at {:?}",
654                input.get_span()
655            );
656            let e = self.pikevm.get();
657            e.is_match(&mut cache.pikevm, input)
658        }
659    }
660
661    fn is_capture_search_needed(&self, slots_len: usize) -> bool {
662        slots_len > self.nfa.group_info().implicit_slot_len()
663    }
664}
665
666impl Strategy for Core {
667    #[cfg_attr(feature = "perf-inline", inline(always))]
668    fn group_info(&self) -> &GroupInfo {
669        self.nfa.group_info()
670    }
671
672    #[cfg_attr(feature = "perf-inline", inline(always))]
673    fn create_cache(&self) -> Cache {
674        Cache {
675            capmatches: Captures::all(self.group_info().clone()),
676            pikevm: self.pikevm.create_cache(),
677            backtrack: self.backtrack.create_cache(),
678            onepass: self.onepass.create_cache(),
679            hybrid: self.hybrid.create_cache(),
680            revhybrid: wrappers::ReverseHybridCache::none(),
681        }
682    }
683
684    #[cfg_attr(feature = "perf-inline", inline(always))]
685    fn reset_cache(&self, cache: &mut Cache) {
686        cache.pikevm.reset(&self.pikevm);
687        cache.backtrack.reset(&self.backtrack);
688        cache.onepass.reset(&self.onepass);
689        cache.hybrid.reset(&self.hybrid);
690    }
691
692    fn is_accelerated(&self) -> bool {
693        self.pre.as_ref().map_or(false, |pre| pre.is_fast())
694    }
695
696    fn memory_usage(&self) -> usize {
697        self.info.memory_usage()
698            + self.pre.as_ref().map_or(0, |pre| pre.memory_usage())
699            + self.nfa.memory_usage()
700            + self.nfarev.as_ref().map_or(0, |nfa| nfa.memory_usage())
701            + self.onepass.memory_usage()
702            + self.dfa.memory_usage()
703    }
704
705    #[cfg_attr(feature = "perf-inline", inline(always))]
706    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
707        // We manually inline try_search_mayfail here because letting the
708        // compiler do it seems to produce pretty crappy codegen.
709        return if let Some(e) = self.dfa.get(input) {
710            trace!("using full DFA for full search at {:?}", input.get_span());
711            match e.try_search(input) {
712                Ok(x) => x,
713                Err(_err) => {
714                    trace!("full DFA search failed: {}", _err);
715                    self.search_nofail(cache, input)
716                }
717            }
718        } else if let Some(e) = self.hybrid.get(input) {
719            trace!("using lazy DFA for full search at {:?}", input.get_span());
720            match e.try_search(&mut cache.hybrid, input) {
721                Ok(x) => x,
722                Err(_err) => {
723                    trace!("lazy DFA search failed: {}", _err);
724                    self.search_nofail(cache, input)
725                }
726            }
727        } else {
728            self.search_nofail(cache, input)
729        };
730    }
731
732    #[cfg_attr(feature = "perf-inline", inline(always))]
733    fn search_half(
734        &self,
735        cache: &mut Cache,
736        input: &Input<'_>,
737    ) -> Option<HalfMatch> {
738        // The main difference with 'search' is that if we're using a DFA, we
739        // can use a single forward scan without needing to run the reverse
740        // DFA.
741        if let Some(e) = self.dfa.get(input) {
742            trace!("using full DFA for half search at {:?}", input.get_span());
743            match e.try_search_half_fwd(input) {
744                Ok(x) => x,
745                Err(_err) => {
746                    trace!("full DFA half search failed: {}", _err);
747                    self.search_half_nofail(cache, input)
748                }
749            }
750        } else if let Some(e) = self.hybrid.get(input) {
751            trace!("using lazy DFA for half search at {:?}", input.get_span());
752            match e.try_search_half_fwd(&mut cache.hybrid, input) {
753                Ok(x) => x,
754                Err(_err) => {
755                    trace!("lazy DFA half search failed: {}", _err);
756                    self.search_half_nofail(cache, input)
757                }
758            }
759        } else {
760            self.search_half_nofail(cache, input)
761        }
762    }
763
764    #[cfg_attr(feature = "perf-inline", inline(always))]
765    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
766        if let Some(e) = self.dfa.get(input) {
767            trace!(
768                "using full DFA for is-match search at {:?}",
769                input.get_span()
770            );
771            match e.try_search_half_fwd(input) {
772                Ok(x) => x.is_some(),
773                Err(_err) => {
774                    trace!("full DFA half search failed: {}", _err);
775                    self.is_match_nofail(cache, input)
776                }
777            }
778        } else if let Some(e) = self.hybrid.get(input) {
779            trace!(
780                "using lazy DFA for is-match search at {:?}",
781                input.get_span()
782            );
783            match e.try_search_half_fwd(&mut cache.hybrid, input) {
784                Ok(x) => x.is_some(),
785                Err(_err) => {
786                    trace!("lazy DFA half search failed: {}", _err);
787                    self.is_match_nofail(cache, input)
788                }
789            }
790        } else {
791            self.is_match_nofail(cache, input)
792        }
793    }
794
795    #[cfg_attr(feature = "perf-inline", inline(always))]
796    fn search_slots(
797        &self,
798        cache: &mut Cache,
799        input: &Input<'_>,
800        slots: &mut [Option<NonMaxUsize>],
801    ) -> Option<PatternID> {
802        // Even if the regex has explicit capture groups, if the caller didn't
803        // provide any explicit slots, then it doesn't make sense to try and do
804        // extra work to get offsets for those slots. Ideally the caller should
805        // realize this and not call this routine in the first place, but alas,
806        // we try to save the caller from themselves if they do.
807        if !self.is_capture_search_needed(slots.len()) {
808            trace!("asked for slots unnecessarily, trying fast path");
809            let m = self.search(cache, input)?;
810            copy_match_to_slots(m, slots);
811            return Some(m.pattern());
812        }
813        // If the onepass DFA is available for this search (which only happens
814        // when it's anchored), then skip running a fallible DFA. The onepass
815        // DFA isn't as fast as a full or lazy DFA, but it is typically quite
816        // a bit faster than the backtracker or the PikeVM. So it isn't as
817        // advantageous to try and do a full/lazy DFA scan first.
818        //
819        // We still theorize that it's better to do a full/lazy DFA scan, even
820        // when it's anchored, because it's usually much faster and permits us
821        // to say "no match" much more quickly. This does hurt the case of,
822        // say, parsing each line in a log file into capture groups, because
823        // in that case, the line always matches. So the lazy DFA scan is
824        // usually just wasted work. But, the lazy DFA is usually quite fast
825        // and doesn't cost too much here.
826        if self.onepass.get(&input).is_some() {
827            return self.search_slots_nofail(cache, &input, slots);
828        }
829        let m = match self.try_search_mayfail(cache, input) {
830            Some(Ok(Some(m))) => m,
831            Some(Ok(None)) => return None,
832            Some(Err(_err)) => {
833                trace!("fast capture search failed: {}", _err);
834                return self.search_slots_nofail(cache, input, slots);
835            }
836            None => {
837                return self.search_slots_nofail(cache, input, slots);
838            }
839        };
840        // At this point, now that we've found the bounds of the
841        // match, we need to re-run something that can resolve
842        // capturing groups. But we only need to run on it on the
843        // match bounds and not the entire haystack.
844        trace!(
845            "match found at {}..{} in capture search, \
846		  	 using another engine to find captures",
847            m.start(),
848            m.end(),
849        );
850        let input = input
851            .clone()
852            .span(m.start()..m.end())
853            .anchored(Anchored::Pattern(m.pattern()));
854        Some(
855            self.search_slots_nofail(cache, &input, slots)
856                .expect("should find a match"),
857        )
858    }
859
860    #[cfg_attr(feature = "perf-inline", inline(always))]
861    fn which_overlapping_matches(
862        &self,
863        cache: &mut Cache,
864        input: &Input<'_>,
865        patset: &mut PatternSet,
866    ) {
867        if let Some(e) = self.dfa.get(input) {
868            trace!(
869                "using full DFA for overlapping search at {:?}",
870                input.get_span()
871            );
872            let _err = match e.try_which_overlapping_matches(input, patset) {
873                Ok(()) => return,
874                Err(err) => err,
875            };
876            trace!("fast overlapping search failed: {}", _err);
877        } else if let Some(e) = self.hybrid.get(input) {
878            trace!(
879                "using lazy DFA for overlapping search at {:?}",
880                input.get_span()
881            );
882            let _err = match e.try_which_overlapping_matches(
883                &mut cache.hybrid,
884                input,
885                patset,
886            ) {
887                Ok(()) => {
888                    return;
889                }
890                Err(err) => err,
891            };
892            trace!("fast overlapping search failed: {}", _err);
893        }
894        trace!(
895            "using PikeVM for overlapping search at {:?}",
896            input.get_span()
897        );
898        let e = self.pikevm.get();
899        e.which_overlapping_matches(&mut cache.pikevm, input, patset)
900    }
901}
902
903#[derive(Debug)]
904struct ReverseAnchored {
905    core: Core,
906}
907
908impl ReverseAnchored {
909    fn new(core: Core) -> Result<ReverseAnchored, Core> {
910        if !core.info.is_always_anchored_end() {
911            debug!(
912                "skipping reverse anchored optimization because \
913				 the regex is not always anchored at the end"
914            );
915            return Err(core);
916        }
917        // Note that the caller can still request an anchored search even when
918        // the regex isn't anchored at the start. We detect that case in the
919        // search routines below and just fallback to the core engine. This
920        // is fine because both searches are anchored. It's just a matter of
921        // picking one. Falling back to the core engine is a little simpler,
922        // since if we used the reverse anchored approach, we'd have to add an
923        // extra check to ensure the match reported starts at the place where
924        // the caller requested the search to start.
925        if core.info.is_always_anchored_start() {
926            debug!(
927                "skipping reverse anchored optimization because \
928				 the regex is also anchored at the start"
929            );
930            return Err(core);
931        }
932        // Only DFAs can do reverse searches (currently), so we need one of
933        // them in order to do this optimization. It's possible (although
934        // pretty unlikely) that we have neither and need to give up.
935        if !core.hybrid.is_some() && !core.dfa.is_some() {
936            debug!(
937                "skipping reverse anchored optimization because \
938				 we don't have a lazy DFA or a full DFA"
939            );
940            return Err(core);
941        }
942        Ok(ReverseAnchored { core })
943    }
944
945    #[cfg_attr(feature = "perf-inline", inline(always))]
946    fn try_search_half_anchored_rev(
947        &self,
948        cache: &mut Cache,
949        input: &Input<'_>,
950    ) -> Result<Option<HalfMatch>, RetryFailError> {
951        // We of course always want an anchored search. In theory, the
952        // underlying regex engines should automatically enable anchored
953        // searches since the regex is itself anchored, but this more clearly
954        // expresses intent and is always correct.
955        let input = input.clone().anchored(Anchored::Yes);
956        if let Some(e) = self.core.dfa.get(&input) {
957            trace!(
958                "using full DFA for reverse anchored search at {:?}",
959                input.get_span()
960            );
961            e.try_search_half_rev(&input)
962        } else if let Some(e) = self.core.hybrid.get(&input) {
963            trace!(
964                "using lazy DFA for reverse anchored search at {:?}",
965                input.get_span()
966            );
967            e.try_search_half_rev(&mut cache.hybrid, &input)
968        } else {
969            unreachable!("ReverseAnchored always has a DFA")
970        }
971    }
972}
973
974// Note that in this impl, we don't check that 'input.end() ==
975// input.haystack().len()'. In particular, when that condition is false, a
976// match is always impossible because we know that the regex is always anchored
977// at the end (or else 'ReverseAnchored' won't be built). We don't check that
978// here because the 'Regex' wrapper actually does that for us in all cases.
979// Thus, in this impl, we can actually assume that the end position in 'input'
980// is equivalent to the length of the haystack.
981impl Strategy for ReverseAnchored {
982    #[cfg_attr(feature = "perf-inline", inline(always))]
983    fn group_info(&self) -> &GroupInfo {
984        self.core.group_info()
985    }
986
987    #[cfg_attr(feature = "perf-inline", inline(always))]
988    fn create_cache(&self) -> Cache {
989        self.core.create_cache()
990    }
991
992    #[cfg_attr(feature = "perf-inline", inline(always))]
993    fn reset_cache(&self, cache: &mut Cache) {
994        self.core.reset_cache(cache);
995    }
996
997    fn is_accelerated(&self) -> bool {
998        // Since this is anchored at the end, a reverse anchored search is
999        // almost certainly guaranteed to result in a much faster search than
1000        // a standard forward search.
1001        true
1002    }
1003
1004    fn memory_usage(&self) -> usize {
1005        self.core.memory_usage()
1006    }
1007
1008    #[cfg_attr(feature = "perf-inline", inline(always))]
1009    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1010        if input.get_anchored().is_anchored() {
1011            return self.core.search(cache, input);
1012        }
1013        match self.try_search_half_anchored_rev(cache, input) {
1014            Err(_err) => {
1015                trace!("fast reverse anchored search failed: {}", _err);
1016                self.core.search_nofail(cache, input)
1017            }
1018            Ok(None) => None,
1019            Ok(Some(hm)) => {
1020                Some(Match::new(hm.pattern(), hm.offset()..input.end()))
1021            }
1022        }
1023    }
1024
1025    #[cfg_attr(feature = "perf-inline", inline(always))]
1026    fn search_half(
1027        &self,
1028        cache: &mut Cache,
1029        input: &Input<'_>,
1030    ) -> Option<HalfMatch> {
1031        if input.get_anchored().is_anchored() {
1032            return self.core.search_half(cache, input);
1033        }
1034        match self.try_search_half_anchored_rev(cache, input) {
1035            Err(_err) => {
1036                trace!("fast reverse anchored search failed: {}", _err);
1037                self.core.search_half_nofail(cache, input)
1038            }
1039            Ok(None) => None,
1040            Ok(Some(hm)) => {
1041                // Careful here! 'try_search_half' is a *forward* search that
1042                // only cares about the *end* position of a match. But
1043                // 'hm.offset()' is actually the start of the match. So we
1044                // actually just throw that away here and, since we know we
1045                // have a match, return the only possible position at which a
1046                // match can occur: input.end().
1047                Some(HalfMatch::new(hm.pattern(), input.end()))
1048            }
1049        }
1050    }
1051
1052    #[cfg_attr(feature = "perf-inline", inline(always))]
1053    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1054        if input.get_anchored().is_anchored() {
1055            return self.core.is_match(cache, input);
1056        }
1057        match self.try_search_half_anchored_rev(cache, input) {
1058            Err(_err) => {
1059                trace!("fast reverse anchored search failed: {}", _err);
1060                self.core.is_match_nofail(cache, input)
1061            }
1062            Ok(None) => false,
1063            Ok(Some(_)) => true,
1064        }
1065    }
1066
1067    #[cfg_attr(feature = "perf-inline", inline(always))]
1068    fn search_slots(
1069        &self,
1070        cache: &mut Cache,
1071        input: &Input<'_>,
1072        slots: &mut [Option<NonMaxUsize>],
1073    ) -> Option<PatternID> {
1074        if input.get_anchored().is_anchored() {
1075            return self.core.search_slots(cache, input, slots);
1076        }
1077        match self.try_search_half_anchored_rev(cache, input) {
1078            Err(_err) => {
1079                trace!("fast reverse anchored search failed: {}", _err);
1080                self.core.search_slots_nofail(cache, input, slots)
1081            }
1082            Ok(None) => None,
1083            Ok(Some(hm)) => {
1084                if !self.core.is_capture_search_needed(slots.len()) {
1085                    trace!("asked for slots unnecessarily, skipping captures");
1086                    let m = Match::new(hm.pattern(), hm.offset()..input.end());
1087                    copy_match_to_slots(m, slots);
1088                    return Some(m.pattern());
1089                }
1090                let start = hm.offset();
1091                let input = input
1092                    .clone()
1093                    .span(start..input.end())
1094                    .anchored(Anchored::Pattern(hm.pattern()));
1095                self.core.search_slots_nofail(cache, &input, slots)
1096            }
1097        }
1098    }
1099
1100    #[cfg_attr(feature = "perf-inline", inline(always))]
1101    fn which_overlapping_matches(
1102        &self,
1103        cache: &mut Cache,
1104        input: &Input<'_>,
1105        patset: &mut PatternSet,
1106    ) {
1107        // It seems like this could probably benefit from a reverse anchored
1108        // optimization, perhaps by doing an overlapping reverse search (which
1109        // the DFAs do support). I haven't given it much thought though, and
1110        // I'm currently focus more on the single pattern case.
1111        self.core.which_overlapping_matches(cache, input, patset)
1112    }
1113}
1114
1115#[derive(Debug)]
1116struct ReverseSuffix {
1117    core: Core,
1118    pre: Prefilter,
1119}
1120
1121impl ReverseSuffix {
1122    fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseSuffix, Core> {
1123        if !core.info.config().get_auto_prefilter() {
1124            debug!(
1125                "skipping reverse suffix optimization because \
1126                 automatic prefilters are disabled"
1127            );
1128            return Err(core);
1129        }
1130        // Like the reverse inner optimization, we don't do this for regexes
1131        // that are always anchored. It could lead to scanning too much, but
1132        // could say "no match" much more quickly than running the regex
1133        // engine if the initial literal scan doesn't match. With that said,
1134        // the reverse suffix optimization has lower overhead, since it only
1135        // requires a reverse scan after a literal match to confirm or reject
1136        // the match. (Although, in the case of confirmation, it then needs to
1137        // do another forward scan to find the end position.)
1138        //
1139        // Note that the caller can still request an anchored search even
1140        // when the regex isn't anchored. We detect that case in the search
1141        // routines below and just fallback to the core engine. Currently this
1142        // optimization assumes all searches are unanchored, so if we do want
1143        // to enable this optimization for anchored searches, it will need a
1144        // little work to support it.
1145        if core.info.is_always_anchored_start() {
1146            debug!(
1147                "skipping reverse suffix optimization because \
1148				 the regex is always anchored at the start",
1149            );
1150            return Err(core);
1151        }
1152        // Only DFAs can do reverse searches (currently), so we need one of
1153        // them in order to do this optimization. It's possible (although
1154        // pretty unlikely) that we have neither and need to give up.
1155        if !core.hybrid.is_some() && !core.dfa.is_some() {
1156            debug!(
1157                "skipping reverse suffix optimization because \
1158				 we don't have a lazy DFA or a full DFA"
1159            );
1160            return Err(core);
1161        }
1162        if core.pre.as_ref().map_or(false, |p| p.is_fast()) {
1163            debug!(
1164                "skipping reverse suffix optimization because \
1165				 we already have a prefilter that we think is fast"
1166            );
1167            return Err(core);
1168        }
1169        let kind = core.info.config().get_match_kind();
1170        let suffixes = crate::util::prefilter::suffixes(kind, hirs);
1171        let lcs = match suffixes.longest_common_suffix() {
1172            None => {
1173                debug!(
1174                    "skipping reverse suffix optimization because \
1175                     a longest common suffix could not be found",
1176                );
1177                return Err(core);
1178            }
1179            Some(lcs) if lcs.is_empty() => {
1180                debug!(
1181                    "skipping reverse suffix optimization because \
1182                     the longest common suffix is the empty string",
1183                );
1184                return Err(core);
1185            }
1186            Some(lcs) => lcs,
1187        };
1188        let pre = match Prefilter::new(kind, &[lcs]) {
1189            Some(pre) => pre,
1190            None => {
1191                debug!(
1192                    "skipping reverse suffix optimization because \
1193                     a prefilter could not be constructed from the \
1194                     longest common suffix",
1195                );
1196                return Err(core);
1197            }
1198        };
1199        if !pre.is_fast() {
1200            debug!(
1201                "skipping reverse suffix optimization because \
1202				 while we have a suffix prefilter, it is not \
1203				 believed to be 'fast'"
1204            );
1205            return Err(core);
1206        }
1207        Ok(ReverseSuffix { core, pre })
1208    }
1209
1210    #[cfg_attr(feature = "perf-inline", inline(always))]
1211    fn try_search_half_start(
1212        &self,
1213        cache: &mut Cache,
1214        input: &Input<'_>,
1215    ) -> Result<Option<HalfMatch>, RetryError> {
1216        let mut span = input.get_span();
1217        let mut min_start = 0;
1218        loop {
1219            let litmatch = match self.pre.find(input.haystack(), span) {
1220                None => return Ok(None),
1221                Some(span) => span,
1222            };
1223            trace!("reverse suffix scan found suffix match at {:?}", litmatch);
1224            let revinput = input
1225                .clone()
1226                .anchored(Anchored::Yes)
1227                .span(input.start()..litmatch.end);
1228            match self
1229                .try_search_half_rev_limited(cache, &revinput, min_start)?
1230            {
1231                None => {
1232                    if span.start >= span.end {
1233                        break;
1234                    }
1235                    span.start = litmatch.start.checked_add(1).unwrap();
1236                }
1237                Some(hm) => return Ok(Some(hm)),
1238            }
1239            min_start = litmatch.end;
1240        }
1241        Ok(None)
1242    }
1243
1244    #[cfg_attr(feature = "perf-inline", inline(always))]
1245    fn try_search_half_fwd(
1246        &self,
1247        cache: &mut Cache,
1248        input: &Input<'_>,
1249    ) -> Result<Option<HalfMatch>, RetryFailError> {
1250        if let Some(e) = self.core.dfa.get(&input) {
1251            trace!(
1252                "using full DFA for forward reverse suffix search at {:?}",
1253                input.get_span()
1254            );
1255            e.try_search_half_fwd(&input)
1256        } else if let Some(e) = self.core.hybrid.get(&input) {
1257            trace!(
1258                "using lazy DFA for forward reverse suffix search at {:?}",
1259                input.get_span()
1260            );
1261            e.try_search_half_fwd(&mut cache.hybrid, &input)
1262        } else {
1263            unreachable!("ReverseSuffix always has a DFA")
1264        }
1265    }
1266
1267    #[cfg_attr(feature = "perf-inline", inline(always))]
1268    fn try_search_half_rev_limited(
1269        &self,
1270        cache: &mut Cache,
1271        input: &Input<'_>,
1272        min_start: usize,
1273    ) -> Result<Option<HalfMatch>, RetryError> {
1274        if let Some(e) = self.core.dfa.get(&input) {
1275            trace!(
1276                "using full DFA for reverse suffix search at {:?}, \
1277                 but will be stopped at {} to avoid quadratic behavior",
1278                input.get_span(),
1279                min_start,
1280            );
1281            e.try_search_half_rev_limited(&input, min_start)
1282        } else if let Some(e) = self.core.hybrid.get(&input) {
1283            trace!(
1284                "using lazy DFA for reverse suffix search at {:?}, \
1285                 but will be stopped at {} to avoid quadratic behavior",
1286                input.get_span(),
1287                min_start,
1288            );
1289            e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start)
1290        } else {
1291            unreachable!("ReverseSuffix always has a DFA")
1292        }
1293    }
1294}
1295
1296impl Strategy for ReverseSuffix {
1297    #[cfg_attr(feature = "perf-inline", inline(always))]
1298    fn group_info(&self) -> &GroupInfo {
1299        self.core.group_info()
1300    }
1301
1302    #[cfg_attr(feature = "perf-inline", inline(always))]
1303    fn create_cache(&self) -> Cache {
1304        self.core.create_cache()
1305    }
1306
1307    #[cfg_attr(feature = "perf-inline", inline(always))]
1308    fn reset_cache(&self, cache: &mut Cache) {
1309        self.core.reset_cache(cache);
1310    }
1311
1312    fn is_accelerated(&self) -> bool {
1313        self.pre.is_fast()
1314    }
1315
1316    fn memory_usage(&self) -> usize {
1317        self.core.memory_usage() + self.pre.memory_usage()
1318    }
1319
1320    #[cfg_attr(feature = "perf-inline", inline(always))]
1321    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1322        if input.get_anchored().is_anchored() {
1323            return self.core.search(cache, input);
1324        }
1325        match self.try_search_half_start(cache, input) {
1326            Err(RetryError::Quadratic(_err)) => {
1327                trace!("reverse suffix optimization failed: {}", _err);
1328                self.core.search(cache, input)
1329            }
1330            Err(RetryError::Fail(_err)) => {
1331                trace!("reverse suffix reverse fast search failed: {}", _err);
1332                self.core.search_nofail(cache, input)
1333            }
1334            Ok(None) => None,
1335            Ok(Some(hm_start)) => {
1336                let fwdinput = input
1337                    .clone()
1338                    .anchored(Anchored::Pattern(hm_start.pattern()))
1339                    .span(hm_start.offset()..input.end());
1340                match self.try_search_half_fwd(cache, &fwdinput) {
1341                    Err(_err) => {
1342                        trace!(
1343                            "reverse suffix forward fast search failed: {}",
1344                            _err
1345                        );
1346                        self.core.search_nofail(cache, input)
1347                    }
1348                    Ok(None) => {
1349                        unreachable!(
1350                            "suffix match plus reverse match implies \
1351						     there must be a match",
1352                        )
1353                    }
1354                    Ok(Some(hm_end)) => Some(Match::new(
1355                        hm_start.pattern(),
1356                        hm_start.offset()..hm_end.offset(),
1357                    )),
1358                }
1359            }
1360        }
1361    }
1362
1363    #[cfg_attr(feature = "perf-inline", inline(always))]
1364    fn search_half(
1365        &self,
1366        cache: &mut Cache,
1367        input: &Input<'_>,
1368    ) -> Option<HalfMatch> {
1369        if input.get_anchored().is_anchored() {
1370            return self.core.search_half(cache, input);
1371        }
1372        match self.try_search_half_start(cache, input) {
1373            Err(RetryError::Quadratic(_err)) => {
1374                trace!("reverse suffix half optimization failed: {}", _err);
1375                self.core.search_half(cache, input)
1376            }
1377            Err(RetryError::Fail(_err)) => {
1378                trace!(
1379                    "reverse suffix reverse fast half search failed: {}",
1380                    _err
1381                );
1382                self.core.search_half_nofail(cache, input)
1383            }
1384            Ok(None) => None,
1385            Ok(Some(hm_start)) => {
1386                // This is a bit subtle. It is tempting to just stop searching
1387                // at this point and return a half-match with an offset
1388                // corresponding to where the suffix was found. But the suffix
1389                // match does not necessarily correspond to the end of the
1390                // proper leftmost-first match. Consider /[a-z]+ing/ against
1391                // 'tingling'. The first suffix match is the first 'ing', and
1392                // the /[a-z]+/ matches the 't'. So if we stopped here, then
1393                // we'd report 'ting' as the match. But 'tingling' is the
1394                // correct match because of greediness.
1395                let fwdinput = input
1396                    .clone()
1397                    .anchored(Anchored::Pattern(hm_start.pattern()))
1398                    .span(hm_start.offset()..input.end());
1399                match self.try_search_half_fwd(cache, &fwdinput) {
1400                    Err(_err) => {
1401                        trace!(
1402                            "reverse suffix forward fast search failed: {}",
1403                            _err
1404                        );
1405                        self.core.search_half_nofail(cache, input)
1406                    }
1407                    Ok(None) => {
1408                        unreachable!(
1409                            "suffix match plus reverse match implies \
1410						     there must be a match",
1411                        )
1412                    }
1413                    Ok(Some(hm_end)) => Some(hm_end),
1414                }
1415            }
1416        }
1417    }
1418
1419    #[cfg_attr(feature = "perf-inline", inline(always))]
1420    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1421        if input.get_anchored().is_anchored() {
1422            return self.core.is_match(cache, input);
1423        }
1424        match self.try_search_half_start(cache, input) {
1425            Err(RetryError::Quadratic(_err)) => {
1426                trace!("reverse suffix half optimization failed: {}", _err);
1427                self.core.is_match_nofail(cache, input)
1428            }
1429            Err(RetryError::Fail(_err)) => {
1430                trace!(
1431                    "reverse suffix reverse fast half search failed: {}",
1432                    _err
1433                );
1434                self.core.is_match_nofail(cache, input)
1435            }
1436            Ok(None) => false,
1437            Ok(Some(_)) => true,
1438        }
1439    }
1440
1441    #[cfg_attr(feature = "perf-inline", inline(always))]
1442    fn search_slots(
1443        &self,
1444        cache: &mut Cache,
1445        input: &Input<'_>,
1446        slots: &mut [Option<NonMaxUsize>],
1447    ) -> Option<PatternID> {
1448        if input.get_anchored().is_anchored() {
1449            return self.core.search_slots(cache, input, slots);
1450        }
1451        if !self.core.is_capture_search_needed(slots.len()) {
1452            trace!("asked for slots unnecessarily, trying fast path");
1453            let m = self.search(cache, input)?;
1454            copy_match_to_slots(m, slots);
1455            return Some(m.pattern());
1456        }
1457        let hm_start = match self.try_search_half_start(cache, input) {
1458            Err(RetryError::Quadratic(_err)) => {
1459                trace!(
1460                    "reverse suffix captures optimization failed: {}",
1461                    _err
1462                );
1463                return self.core.search_slots(cache, input, slots);
1464            }
1465            Err(RetryError::Fail(_err)) => {
1466                trace!(
1467                    "reverse suffix reverse fast captures search failed: {}",
1468                    _err
1469                );
1470                return self.core.search_slots_nofail(cache, input, slots);
1471            }
1472            Ok(None) => return None,
1473            Ok(Some(hm_start)) => hm_start,
1474        };
1475        trace!(
1476            "match found at {}..{} in capture search, \
1477		  	 using another engine to find captures",
1478            hm_start.offset(),
1479            input.end(),
1480        );
1481        let start = hm_start.offset();
1482        let input = input
1483            .clone()
1484            .span(start..input.end())
1485            .anchored(Anchored::Pattern(hm_start.pattern()));
1486        self.core.search_slots_nofail(cache, &input, slots)
1487    }
1488
1489    #[cfg_attr(feature = "perf-inline", inline(always))]
1490    fn which_overlapping_matches(
1491        &self,
1492        cache: &mut Cache,
1493        input: &Input<'_>,
1494        patset: &mut PatternSet,
1495    ) {
1496        self.core.which_overlapping_matches(cache, input, patset)
1497    }
1498}
1499
1500#[derive(Debug)]
1501struct ReverseInner {
1502    core: Core,
1503    preinner: Prefilter,
1504    nfarev: NFA,
1505    hybrid: wrappers::ReverseHybrid,
1506    dfa: wrappers::ReverseDFA,
1507}
1508
1509impl ReverseInner {
1510    fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseInner, Core> {
1511        if !core.info.config().get_auto_prefilter() {
1512            debug!(
1513                "skipping reverse inner optimization because \
1514                 automatic prefilters are disabled"
1515            );
1516            return Err(core);
1517        }
1518        // Currently we hard-code the assumption of leftmost-first match
1519        // semantics. This isn't a huge deal because 'all' semantics tend to
1520        // only be used for forward overlapping searches with multiple regexes,
1521        // and this optimization only supports a single pattern at the moment.
1522        if core.info.config().get_match_kind() != MatchKind::LeftmostFirst {
1523            debug!(
1524                "skipping reverse inner optimization because \
1525				 match kind is {:?} but this only supports leftmost-first",
1526                core.info.config().get_match_kind(),
1527            );
1528            return Err(core);
1529        }
1530        // It's likely that a reverse inner scan has too much overhead for it
1531        // to be worth it when the regex is anchored at the start. It is
1532        // possible for it to be quite a bit faster if the initial literal
1533        // scan fails to detect a match, in which case, we can say "no match"
1534        // very quickly. But this could be undesirable, e.g., scanning too far
1535        // or when the literal scan matches. If it matches, then confirming the
1536        // match requires a reverse scan followed by a forward scan to confirm
1537        // or reject, which is a fair bit of work.
1538        //
1539        // Note that the caller can still request an anchored search even
1540        // when the regex isn't anchored. We detect that case in the search
1541        // routines below and just fallback to the core engine. Currently this
1542        // optimization assumes all searches are unanchored, so if we do want
1543        // to enable this optimization for anchored searches, it will need a
1544        // little work to support it.
1545        if core.info.is_always_anchored_start() {
1546            debug!(
1547                "skipping reverse inner optimization because \
1548				 the regex is always anchored at the start",
1549            );
1550            return Err(core);
1551        }
1552        // Only DFAs can do reverse searches (currently), so we need one of
1553        // them in order to do this optimization. It's possible (although
1554        // pretty unlikely) that we have neither and need to give up.
1555        if !core.hybrid.is_some() && !core.dfa.is_some() {
1556            debug!(
1557                "skipping reverse inner optimization because \
1558				 we don't have a lazy DFA or a full DFA"
1559            );
1560            return Err(core);
1561        }
1562        if core.pre.as_ref().map_or(false, |p| p.is_fast()) {
1563            debug!(
1564                "skipping reverse inner optimization because \
1565				 we already have a prefilter that we think is fast"
1566            );
1567            return Err(core);
1568        } else if core.pre.is_some() {
1569            debug!(
1570                "core engine has a prefix prefilter, but it is \
1571                 probably not fast, so continuing with attempt to \
1572                 use reverse inner prefilter"
1573            );
1574        }
1575        let (concat_prefix, preinner) = match reverse_inner::extract(hirs) {
1576            Some(x) => x,
1577            // N.B. the 'extract' function emits debug messages explaining
1578            // why we bailed out here.
1579            None => return Err(core),
1580        };
1581        debug!("building reverse NFA for prefix before inner literal");
1582        let mut lookm = LookMatcher::new();
1583        lookm.set_line_terminator(core.info.config().get_line_terminator());
1584        let thompson_config = thompson::Config::new()
1585            .reverse(true)
1586            .utf8(core.info.config().get_utf8_empty())
1587            .nfa_size_limit(core.info.config().get_nfa_size_limit())
1588            .shrink(false)
1589            .which_captures(WhichCaptures::None)
1590            .look_matcher(lookm);
1591        let result = thompson::Compiler::new()
1592            .configure(thompson_config)
1593            .build_from_hir(&concat_prefix);
1594        let nfarev = match result {
1595            Ok(nfarev) => nfarev,
1596            Err(_err) => {
1597                debug!(
1598                    "skipping reverse inner optimization because the \
1599					 reverse NFA failed to build: {}",
1600                    _err,
1601                );
1602                return Err(core);
1603            }
1604        };
1605        debug!("building reverse DFA for prefix before inner literal");
1606        let dfa = if !core.info.config().get_dfa() {
1607            wrappers::ReverseDFA::none()
1608        } else {
1609            wrappers::ReverseDFA::new(&core.info, &nfarev)
1610        };
1611        let hybrid = if !core.info.config().get_hybrid() {
1612            wrappers::ReverseHybrid::none()
1613        } else if dfa.is_some() {
1614            debug!(
1615                "skipping lazy DFA for reverse inner optimization \
1616				 because we have a full DFA"
1617            );
1618            wrappers::ReverseHybrid::none()
1619        } else {
1620            wrappers::ReverseHybrid::new(&core.info, &nfarev)
1621        };
1622        Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa })
1623    }
1624
1625    #[cfg_attr(feature = "perf-inline", inline(always))]
1626    fn try_search_full(
1627        &self,
1628        cache: &mut Cache,
1629        input: &Input<'_>,
1630    ) -> Result<Option<Match>, RetryError> {
1631        let mut span = input.get_span();
1632        let mut min_match_start = 0;
1633        let mut min_pre_start = 0;
1634        loop {
1635            let litmatch = match self.preinner.find(input.haystack(), span) {
1636                None => return Ok(None),
1637                Some(span) => span,
1638            };
1639            if litmatch.start < min_pre_start {
1640                trace!(
1641                    "found inner prefilter match at {:?}, which starts \
1642					 before the end of the last forward scan at {}, \
1643					 quitting to avoid quadratic behavior",
1644                    litmatch,
1645                    min_pre_start,
1646                );
1647                return Err(RetryError::Quadratic(RetryQuadraticError::new()));
1648            }
1649            trace!("reverse inner scan found inner match at {:?}", litmatch);
1650            let revinput = input
1651                .clone()
1652                .anchored(Anchored::Yes)
1653                .span(input.start()..litmatch.start);
1654            // Note that in addition to the literal search above scanning past
1655            // our minimum start point, this routine can also return an error
1656            // as a result of detecting possible quadratic behavior if the
1657            // reverse scan goes past the minimum start point. That is, the
1658            // literal search might not, but the reverse regex search for the
1659            // prefix might!
1660            match self.try_search_half_rev_limited(
1661                cache,
1662                &revinput,
1663                min_match_start,
1664            )? {
1665                None => {
1666                    if span.start >= span.end {
1667                        break;
1668                    }
1669                    span.start = litmatch.start.checked_add(1).unwrap();
1670                }
1671                Some(hm_start) => {
1672                    let fwdinput = input
1673                        .clone()
1674                        .anchored(Anchored::Pattern(hm_start.pattern()))
1675                        .span(hm_start.offset()..input.end());
1676                    match self.try_search_half_fwd_stopat(cache, &fwdinput)? {
1677                        Err(stopat) => {
1678                            min_pre_start = stopat;
1679                            span.start =
1680                                litmatch.start.checked_add(1).unwrap();
1681                        }
1682                        Ok(hm_end) => {
1683                            return Ok(Some(Match::new(
1684                                hm_start.pattern(),
1685                                hm_start.offset()..hm_end.offset(),
1686                            )))
1687                        }
1688                    }
1689                }
1690            }
1691            min_match_start = litmatch.end;
1692        }
1693        Ok(None)
1694    }
1695
1696    #[cfg_attr(feature = "perf-inline", inline(always))]
1697    fn try_search_half_fwd_stopat(
1698        &self,
1699        cache: &mut Cache,
1700        input: &Input<'_>,
1701    ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
1702        if let Some(e) = self.core.dfa.get(&input) {
1703            trace!(
1704                "using full DFA for forward reverse inner search at {:?}",
1705                input.get_span()
1706            );
1707            e.try_search_half_fwd_stopat(&input)
1708        } else if let Some(e) = self.core.hybrid.get(&input) {
1709            trace!(
1710                "using lazy DFA for forward reverse inner search at {:?}",
1711                input.get_span()
1712            );
1713            e.try_search_half_fwd_stopat(&mut cache.hybrid, &input)
1714        } else {
1715            unreachable!("ReverseInner always has a DFA")
1716        }
1717    }
1718
1719    #[cfg_attr(feature = "perf-inline", inline(always))]
1720    fn try_search_half_rev_limited(
1721        &self,
1722        cache: &mut Cache,
1723        input: &Input<'_>,
1724        min_start: usize,
1725    ) -> Result<Option<HalfMatch>, RetryError> {
1726        if let Some(e) = self.dfa.get(&input) {
1727            trace!(
1728                "using full DFA for reverse inner search at {:?}, \
1729                 but will be stopped at {} to avoid quadratic behavior",
1730                input.get_span(),
1731                min_start,
1732            );
1733            e.try_search_half_rev_limited(&input, min_start)
1734        } else if let Some(e) = self.hybrid.get(&input) {
1735            trace!(
1736                "using lazy DFA for reverse inner search at {:?}, \
1737                 but will be stopped at {} to avoid quadratic behavior",
1738                input.get_span(),
1739                min_start,
1740            );
1741            e.try_search_half_rev_limited(
1742                &mut cache.revhybrid,
1743                &input,
1744                min_start,
1745            )
1746        } else {
1747            unreachable!("ReverseInner always has a DFA")
1748        }
1749    }
1750}
1751
1752impl Strategy for ReverseInner {
1753    #[cfg_attr(feature = "perf-inline", inline(always))]
1754    fn group_info(&self) -> &GroupInfo {
1755        self.core.group_info()
1756    }
1757
1758    #[cfg_attr(feature = "perf-inline", inline(always))]
1759    fn create_cache(&self) -> Cache {
1760        let mut cache = self.core.create_cache();
1761        cache.revhybrid = self.hybrid.create_cache();
1762        cache
1763    }
1764
1765    #[cfg_attr(feature = "perf-inline", inline(always))]
1766    fn reset_cache(&self, cache: &mut Cache) {
1767        self.core.reset_cache(cache);
1768        cache.revhybrid.reset(&self.hybrid);
1769    }
1770
1771    fn is_accelerated(&self) -> bool {
1772        self.preinner.is_fast()
1773    }
1774
1775    fn memory_usage(&self) -> usize {
1776        self.core.memory_usage()
1777            + self.preinner.memory_usage()
1778            + self.nfarev.memory_usage()
1779            + self.dfa.memory_usage()
1780    }
1781
1782    #[cfg_attr(feature = "perf-inline", inline(always))]
1783    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1784        if input.get_anchored().is_anchored() {
1785            return self.core.search(cache, input);
1786        }
1787        match self.try_search_full(cache, input) {
1788            Err(RetryError::Quadratic(_err)) => {
1789                trace!("reverse inner optimization failed: {}", _err);
1790                self.core.search(cache, input)
1791            }
1792            Err(RetryError::Fail(_err)) => {
1793                trace!("reverse inner fast search failed: {}", _err);
1794                self.core.search_nofail(cache, input)
1795            }
1796            Ok(matornot) => matornot,
1797        }
1798    }
1799
1800    #[cfg_attr(feature = "perf-inline", inline(always))]
1801    fn search_half(
1802        &self,
1803        cache: &mut Cache,
1804        input: &Input<'_>,
1805    ) -> Option<HalfMatch> {
1806        if input.get_anchored().is_anchored() {
1807            return self.core.search_half(cache, input);
1808        }
1809        match self.try_search_full(cache, input) {
1810            Err(RetryError::Quadratic(_err)) => {
1811                trace!("reverse inner half optimization failed: {}", _err);
1812                self.core.search_half(cache, input)
1813            }
1814            Err(RetryError::Fail(_err)) => {
1815                trace!("reverse inner fast half search failed: {}", _err);
1816                self.core.search_half_nofail(cache, input)
1817            }
1818            Ok(None) => None,
1819            Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())),
1820        }
1821    }
1822
1823    #[cfg_attr(feature = "perf-inline", inline(always))]
1824    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1825        if input.get_anchored().is_anchored() {
1826            return self.core.is_match(cache, input);
1827        }
1828        match self.try_search_full(cache, input) {
1829            Err(RetryError::Quadratic(_err)) => {
1830                trace!("reverse inner half optimization failed: {}", _err);
1831                self.core.is_match_nofail(cache, input)
1832            }
1833            Err(RetryError::Fail(_err)) => {
1834                trace!("reverse inner fast half search failed: {}", _err);
1835                self.core.is_match_nofail(cache, input)
1836            }
1837            Ok(None) => false,
1838            Ok(Some(_)) => true,
1839        }
1840    }
1841
1842    #[cfg_attr(feature = "perf-inline", inline(always))]
1843    fn search_slots(
1844        &self,
1845        cache: &mut Cache,
1846        input: &Input<'_>,
1847        slots: &mut [Option<NonMaxUsize>],
1848    ) -> Option<PatternID> {
1849        if input.get_anchored().is_anchored() {
1850            return self.core.search_slots(cache, input, slots);
1851        }
1852        if !self.core.is_capture_search_needed(slots.len()) {
1853            trace!("asked for slots unnecessarily, trying fast path");
1854            let m = self.search(cache, input)?;
1855            copy_match_to_slots(m, slots);
1856            return Some(m.pattern());
1857        }
1858        let m = match self.try_search_full(cache, input) {
1859            Err(RetryError::Quadratic(_err)) => {
1860                trace!("reverse inner captures optimization failed: {}", _err);
1861                return self.core.search_slots(cache, input, slots);
1862            }
1863            Err(RetryError::Fail(_err)) => {
1864                trace!("reverse inner fast captures search failed: {}", _err);
1865                return self.core.search_slots_nofail(cache, input, slots);
1866            }
1867            Ok(None) => return None,
1868            Ok(Some(m)) => m,
1869        };
1870        trace!(
1871            "match found at {}..{} in capture search, \
1872		  	 using another engine to find captures",
1873            m.start(),
1874            m.end(),
1875        );
1876        let input = input
1877            .clone()
1878            .span(m.start()..m.end())
1879            .anchored(Anchored::Pattern(m.pattern()));
1880        self.core.search_slots_nofail(cache, &input, slots)
1881    }
1882
1883    #[cfg_attr(feature = "perf-inline", inline(always))]
1884    fn which_overlapping_matches(
1885        &self,
1886        cache: &mut Cache,
1887        input: &Input<'_>,
1888        patset: &mut PatternSet,
1889    ) {
1890        self.core.which_overlapping_matches(cache, input, patset)
1891    }
1892}
1893
1894/// Copies the offsets in the given match to the corresponding positions in
1895/// `slots`.
1896///
1897/// In effect, this sets the slots corresponding to the implicit group for the
1898/// pattern in the given match. If the indices for the corresponding slots do
1899/// not exist, then no slots are set.
1900///
1901/// This is useful when the caller provides slots (or captures), but you use a
1902/// regex engine that doesn't operate on slots (like a lazy DFA). This function
1903/// lets you map the match you get back to the slots provided by the caller.
1904#[cfg_attr(feature = "perf-inline", inline(always))]
1905fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
1906    let slot_start = m.pattern().as_usize() * 2;
1907    let slot_end = slot_start + 1;
1908    if let Some(slot) = slots.get_mut(slot_start) {
1909        *slot = NonMaxUsize::new(m.start());
1910    }
1911    if let Some(slot) = slots.get_mut(slot_end) {
1912        *slot = NonMaxUsize::new(m.end());
1913    }
1914}