memchr/memmem/
searcher.rs

1use crate::arch::all::{
2    packedpair::{HeuristicFrequencyRank, Pair},
3    rabinkarp, twoway,
4};
5
6#[cfg(target_arch = "aarch64")]
7use crate::arch::aarch64::neon::packedpair as neon;
8#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
9use crate::arch::wasm32::simd128::packedpair as simd128;
10#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
11use crate::arch::x86_64::{
12    avx2::packedpair as avx2, sse2::packedpair as sse2,
13};
14
15/// A "meta" substring searcher.
16///
17/// To a first approximation, this chooses what it believes to be the "best"
18/// substring search implemnetation based on the needle at construction time.
19/// Then, every call to `find` will execute that particular implementation. To
20/// a second approximation, multiple substring search algorithms may be used,
21/// depending on the haystack. For example, for supremely short haystacks,
22/// Rabin-Karp is typically used.
23///
24/// See the documentation on `Prefilter` for an explanation of the dispatching
25/// mechanism. The quick summary is that an enum has too much overhead and
26/// we can't use dynamic dispatch via traits because we need to work in a
27/// core-only environment. (Dynamic dispatch works in core-only, but you
28/// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter
29/// requires `alloc`.) So instead, we use a union and an appropriately paired
30/// free function to read from the correct field on the union and execute the
31/// chosen substring search implementation.
32#[derive(Clone)]
33pub(crate) struct Searcher {
34    call: SearcherKindFn,
35    kind: SearcherKind,
36    rabinkarp: rabinkarp::Finder,
37}
38
39impl Searcher {
40    /// Creates a new "meta" substring searcher that attempts to choose the
41    /// best algorithm based on the needle, heuristics and what the current
42    /// target supports.
43    #[inline]
44    pub(crate) fn new<R: HeuristicFrequencyRank>(
45        prefilter: PrefilterConfig,
46        ranker: R,
47        needle: &[u8],
48    ) -> Searcher {
49        let rabinkarp = rabinkarp::Finder::new(needle);
50        if needle.len() <= 1 {
51            return if needle.is_empty() {
52                trace!("building empty substring searcher");
53                Searcher {
54                    call: searcher_kind_empty,
55                    kind: SearcherKind { empty: () },
56                    rabinkarp,
57                }
58            } else {
59                trace!("building one-byte substring searcher");
60                debug_assert_eq!(1, needle.len());
61                Searcher {
62                    call: searcher_kind_one_byte,
63                    kind: SearcherKind { one_byte: needle[0] },
64                    rabinkarp,
65                }
66            };
67        }
68        let pair = match Pair::with_ranker(needle, &ranker) {
69            Some(pair) => pair,
70            None => return Searcher::twoway(needle, rabinkarp, None),
71        };
72        debug_assert_ne!(
73            pair.index1(),
74            pair.index2(),
75            "pair offsets should not be equivalent"
76        );
77        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
78        {
79            if let Some(pp) = avx2::Finder::with_pair(needle, pair) {
80                if do_packed_search(needle) {
81                    trace!("building x86_64 AVX2 substring searcher");
82                    let kind = SearcherKind { avx2: pp };
83                    Searcher { call: searcher_kind_avx2, kind, rabinkarp }
84                } else if prefilter.is_none() {
85                    Searcher::twoway(needle, rabinkarp, None)
86                } else {
87                    let prestrat = Prefilter::avx2(pp, needle);
88                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
89                }
90            } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) {
91                if do_packed_search(needle) {
92                    trace!("building x86_64 SSE2 substring searcher");
93                    let kind = SearcherKind { sse2: pp };
94                    Searcher { call: searcher_kind_sse2, kind, rabinkarp }
95                } else if prefilter.is_none() {
96                    Searcher::twoway(needle, rabinkarp, None)
97                } else {
98                    let prestrat = Prefilter::sse2(pp, needle);
99                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
100                }
101            } else if prefilter.is_none() {
102                Searcher::twoway(needle, rabinkarp, None)
103            } else {
104                // We're pretty unlikely to get to this point, but it is
105                // possible to be running on x86_64 without SSE2. Namely, it's
106                // really up to the OS whether it wants to support vector
107                // registers or not.
108                let prestrat = Prefilter::fallback(ranker, pair, needle);
109                Searcher::twoway(needle, rabinkarp, prestrat)
110            }
111        }
112        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
113        {
114            if let Some(pp) = simd128::Finder::with_pair(needle, pair) {
115                if do_packed_search(needle) {
116                    trace!("building wasm32 simd128 substring searcher");
117                    let kind = SearcherKind { simd128: pp };
118                    Searcher { call: searcher_kind_simd128, kind, rabinkarp }
119                } else if prefilter.is_none() {
120                    Searcher::twoway(needle, rabinkarp, None)
121                } else {
122                    let prestrat = Prefilter::simd128(pp, needle);
123                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
124                }
125            } else if prefilter.is_none() {
126                Searcher::twoway(needle, rabinkarp, None)
127            } else {
128                let prestrat = Prefilter::fallback(ranker, pair, needle);
129                Searcher::twoway(needle, rabinkarp, prestrat)
130            }
131        }
132        #[cfg(target_arch = "aarch64")]
133        {
134            if let Some(pp) = neon::Finder::with_pair(needle, pair) {
135                if do_packed_search(needle) {
136                    trace!("building aarch64 neon substring searcher");
137                    let kind = SearcherKind { neon: pp };
138                    Searcher { call: searcher_kind_neon, kind, rabinkarp }
139                } else if prefilter.is_none() {
140                    Searcher::twoway(needle, rabinkarp, None)
141                } else {
142                    let prestrat = Prefilter::neon(pp, needle);
143                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
144                }
145            } else if prefilter.is_none() {
146                Searcher::twoway(needle, rabinkarp, None)
147            } else {
148                let prestrat = Prefilter::fallback(ranker, pair, needle);
149                Searcher::twoway(needle, rabinkarp, prestrat)
150            }
151        }
152        #[cfg(not(any(
153            all(target_arch = "x86_64", target_feature = "sse2"),
154            all(target_arch = "wasm32", target_feature = "simd128"),
155            target_arch = "aarch64"
156        )))]
157        {
158            if prefilter.is_none() {
159                Searcher::twoway(needle, rabinkarp, None)
160            } else {
161                let prestrat = Prefilter::fallback(ranker, pair, needle);
162                Searcher::twoway(needle, rabinkarp, prestrat)
163            }
164        }
165    }
166
167    /// Creates a new searcher that always uses the Two-Way algorithm. This is
168    /// typically used when vector algorithms are unavailable or inappropriate.
169    /// (For example, when the needle is "too long.")
170    ///
171    /// If a prefilter is given, then the searcher returned will be accelerated
172    /// by the prefilter.
173    #[inline]
174    fn twoway(
175        needle: &[u8],
176        rabinkarp: rabinkarp::Finder,
177        prestrat: Option<Prefilter>,
178    ) -> Searcher {
179        let finder = twoway::Finder::new(needle);
180        match prestrat {
181            None => {
182                trace!("building scalar two-way substring searcher");
183                let kind = SearcherKind { two_way: finder };
184                Searcher { call: searcher_kind_two_way, kind, rabinkarp }
185            }
186            Some(prestrat) => {
187                trace!(
188                    "building scalar two-way \
189                     substring searcher with a prefilter"
190                );
191                let two_way_with_prefilter =
192                    TwoWayWithPrefilter { finder, prestrat };
193                let kind = SearcherKind { two_way_with_prefilter };
194                Searcher {
195                    call: searcher_kind_two_way_with_prefilter,
196                    kind,
197                    rabinkarp,
198                }
199            }
200        }
201    }
202
203    /// Searches the given haystack for the given needle. The needle given
204    /// should be the same as the needle that this finder was initialized
205    /// with.
206    ///
207    /// Inlining this can lead to big wins for latency, and #[inline] doesn't
208    /// seem to be enough in some cases.
209    #[inline(always)]
210    pub(crate) fn find(
211        &self,
212        prestate: &mut PrefilterState,
213        haystack: &[u8],
214        needle: &[u8],
215    ) -> Option<usize> {
216        if haystack.len() < needle.len() {
217            None
218        } else {
219            // SAFETY: By construction, we've ensured that the function
220            // in `self.call` is properly paired with the union used in
221            // `self.kind`.
222            unsafe { (self.call)(self, prestate, haystack, needle) }
223        }
224    }
225}
226
227impl core::fmt::Debug for Searcher {
228    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
229        f.debug_struct("Searcher")
230            .field("call", &"<searcher function>")
231            .field("kind", &"<searcher kind union>")
232            .field("rabinkarp", &self.rabinkarp)
233            .finish()
234    }
235}
236
237/// A union indicating one of several possible substring search implementations
238/// that are in active use.
239///
240/// This union should only be read by one of the functions prefixed with
241/// `searcher_kind_`. Namely, the correct function is meant to be paired with
242/// the union by the caller, such that the function always reads from the
243/// designated union field.
244#[derive(Clone, Copy)]
245union SearcherKind {
246    empty: (),
247    one_byte: u8,
248    two_way: twoway::Finder,
249    two_way_with_prefilter: TwoWayWithPrefilter,
250    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
251    sse2: crate::arch::x86_64::sse2::packedpair::Finder,
252    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
253    avx2: crate::arch::x86_64::avx2::packedpair::Finder,
254    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
255    simd128: crate::arch::wasm32::simd128::packedpair::Finder,
256    #[cfg(target_arch = "aarch64")]
257    neon: crate::arch::aarch64::neon::packedpair::Finder,
258}
259
260/// A two-way substring searcher with a prefilter.
261#[derive(Copy, Clone, Debug)]
262struct TwoWayWithPrefilter {
263    finder: twoway::Finder,
264    prestrat: Prefilter,
265}
266
267/// The type of a substring search function.
268///
269/// # Safety
270///
271/// When using a function of this type, callers must ensure that the correct
272/// function is paired with the value populated in `SearcherKind` union.
273type SearcherKindFn = unsafe fn(
274    searcher: &Searcher,
275    prestate: &mut PrefilterState,
276    haystack: &[u8],
277    needle: &[u8],
278) -> Option<usize>;
279
280/// Reads from the `empty` field of `SearcherKind` to handle the case of
281/// searching for the empty needle. Works on all platforms.
282///
283/// # Safety
284///
285/// Callers must ensure that the `searcher.kind.empty` union field is set.
286unsafe fn searcher_kind_empty(
287    _searcher: &Searcher,
288    _prestate: &mut PrefilterState,
289    _haystack: &[u8],
290    _needle: &[u8],
291) -> Option<usize> {
292    Some(0)
293}
294
295/// Reads from the `one_byte` field of `SearcherKind` to handle the case of
296/// searching for a single byte needle. Works on all platforms.
297///
298/// # Safety
299///
300/// Callers must ensure that the `searcher.kind.one_byte` union field is set.
301unsafe fn searcher_kind_one_byte(
302    searcher: &Searcher,
303    _prestate: &mut PrefilterState,
304    haystack: &[u8],
305    _needle: &[u8],
306) -> Option<usize> {
307    let needle = searcher.kind.one_byte;
308    crate::memchr(needle, haystack)
309}
310
311/// Reads from the `two_way` field of `SearcherKind` to handle the case of
312/// searching for an arbitrary needle without prefilter acceleration. Works on
313/// all platforms.
314///
315/// # Safety
316///
317/// Callers must ensure that the `searcher.kind.two_way` union field is set.
318unsafe fn searcher_kind_two_way(
319    searcher: &Searcher,
320    _prestate: &mut PrefilterState,
321    haystack: &[u8],
322    needle: &[u8],
323) -> Option<usize> {
324    if rabinkarp::is_fast(haystack, needle) {
325        searcher.rabinkarp.find(haystack, needle)
326    } else {
327        searcher.kind.two_way.find(haystack, needle)
328    }
329}
330
331/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle
332/// the case of searching for an arbitrary needle with prefilter acceleration.
333/// Works on all platforms.
334///
335/// # Safety
336///
337/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union
338/// field is set.
339unsafe fn searcher_kind_two_way_with_prefilter(
340    searcher: &Searcher,
341    prestate: &mut PrefilterState,
342    haystack: &[u8],
343    needle: &[u8],
344) -> Option<usize> {
345    if rabinkarp::is_fast(haystack, needle) {
346        searcher.rabinkarp.find(haystack, needle)
347    } else {
348        let TwoWayWithPrefilter { ref finder, ref prestrat } =
349            searcher.kind.two_way_with_prefilter;
350        let pre = Pre { prestate, prestrat };
351        finder.find_with_prefilter(Some(pre), haystack, needle)
352    }
353}
354
355/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2
356/// vectorized substring search implementation.
357///
358/// # Safety
359///
360/// Callers must ensure that the `searcher.kind.sse2` union field is set.
361#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
362unsafe fn searcher_kind_sse2(
363    searcher: &Searcher,
364    _prestate: &mut PrefilterState,
365    haystack: &[u8],
366    needle: &[u8],
367) -> Option<usize> {
368    let finder = &searcher.kind.sse2;
369    if haystack.len() < finder.min_haystack_len() {
370        searcher.rabinkarp.find(haystack, needle)
371    } else {
372        finder.find(haystack, needle)
373    }
374}
375
376/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2
377/// vectorized substring search implementation.
378///
379/// # Safety
380///
381/// Callers must ensure that the `searcher.kind.avx2` union field is set.
382#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
383unsafe fn searcher_kind_avx2(
384    searcher: &Searcher,
385    _prestate: &mut PrefilterState,
386    haystack: &[u8],
387    needle: &[u8],
388) -> Option<usize> {
389    let finder = &searcher.kind.avx2;
390    if haystack.len() < finder.min_haystack_len() {
391        searcher.rabinkarp.find(haystack, needle)
392    } else {
393        finder.find(haystack, needle)
394    }
395}
396
397/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32
398/// simd128 vectorized substring search implementation.
399///
400/// # Safety
401///
402/// Callers must ensure that the `searcher.kind.simd128` union field is set.
403#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
404unsafe fn searcher_kind_simd128(
405    searcher: &Searcher,
406    _prestate: &mut PrefilterState,
407    haystack: &[u8],
408    needle: &[u8],
409) -> Option<usize> {
410    let finder = &searcher.kind.simd128;
411    if haystack.len() < finder.min_haystack_len() {
412        searcher.rabinkarp.find(haystack, needle)
413    } else {
414        finder.find(haystack, needle)
415    }
416}
417
418/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon
419/// vectorized substring search implementation.
420///
421/// # Safety
422///
423/// Callers must ensure that the `searcher.kind.neon` union field is set.
424#[cfg(target_arch = "aarch64")]
425unsafe fn searcher_kind_neon(
426    searcher: &Searcher,
427    _prestate: &mut PrefilterState,
428    haystack: &[u8],
429    needle: &[u8],
430) -> Option<usize> {
431    let finder = &searcher.kind.neon;
432    if haystack.len() < finder.min_haystack_len() {
433        searcher.rabinkarp.find(haystack, needle)
434    } else {
435        finder.find(haystack, needle)
436    }
437}
438
439/// A reverse substring searcher.
440#[derive(Clone, Debug)]
441pub(crate) struct SearcherRev {
442    kind: SearcherRevKind,
443    rabinkarp: rabinkarp::FinderRev,
444}
445
446/// The kind of the reverse searcher.
447///
448/// For the reverse case, we don't do any SIMD acceleration or prefilters.
449/// There is no specific technical reason why we don't, but rather don't do it
450/// because it's not clear it's worth the extra code to do so. If you have a
451/// use case for it, please file an issue.
452///
453/// We also don't do the union trick as we do with the forward case and
454/// prefilters. Basically for the same reason we don't have prefilters or
455/// vector algorithms for reverse searching: it's not clear it's worth doing.
456/// Please file an issue if you have a compelling use case for fast reverse
457/// substring search.
458#[derive(Clone, Debug)]
459enum SearcherRevKind {
460    Empty,
461    OneByte { needle: u8 },
462    TwoWay { finder: twoway::FinderRev },
463}
464
465impl SearcherRev {
466    /// Creates a new searcher for finding occurrences of the given needle in
467    /// reverse. That is, it reports the last (instead of the first) occurrence
468    /// of a needle in a haystack.
469    #[inline]
470    pub(crate) fn new(needle: &[u8]) -> SearcherRev {
471        let kind = if needle.len() <= 1 {
472            if needle.is_empty() {
473                trace!("building empty reverse substring searcher");
474                SearcherRevKind::Empty
475            } else {
476                trace!("building one-byte reverse substring searcher");
477                debug_assert_eq!(1, needle.len());
478                SearcherRevKind::OneByte { needle: needle[0] }
479            }
480        } else {
481            trace!("building scalar two-way reverse substring searcher");
482            let finder = twoway::FinderRev::new(needle);
483            SearcherRevKind::TwoWay { finder }
484        };
485        let rabinkarp = rabinkarp::FinderRev::new(needle);
486        SearcherRev { kind, rabinkarp }
487    }
488
489    /// Searches the given haystack for the last occurrence of the given
490    /// needle. The needle given should be the same as the needle that this
491    /// finder was initialized with.
492    #[inline]
493    pub(crate) fn rfind(
494        &self,
495        haystack: &[u8],
496        needle: &[u8],
497    ) -> Option<usize> {
498        if haystack.len() < needle.len() {
499            return None;
500        }
501        match self.kind {
502            SearcherRevKind::Empty => Some(haystack.len()),
503            SearcherRevKind::OneByte { needle } => {
504                crate::memrchr(needle, haystack)
505            }
506            SearcherRevKind::TwoWay { ref finder } => {
507                if rabinkarp::is_fast(haystack, needle) {
508                    self.rabinkarp.rfind(haystack, needle)
509                } else {
510                    finder.rfind(haystack, needle)
511                }
512            }
513        }
514    }
515}
516
517/// Prefilter controls whether heuristics are used to accelerate searching.
518///
519/// A prefilter refers to the idea of detecting candidate matches very quickly,
520/// and then confirming whether those candidates are full matches. This
521/// idea can be quite effective since it's often the case that looking for
522/// candidates can be a lot faster than running a complete substring search
523/// over the entire input. Namely, looking for candidates can be done with
524/// extremely fast vectorized code.
525///
526/// The downside of a prefilter is that it assumes false positives (which are
527/// candidates generated by a prefilter that aren't matches) are somewhat rare
528/// relative to the frequency of full matches. That is, if a lot of false
529/// positives are generated, then it's possible for search time to be worse
530/// than if the prefilter wasn't enabled in the first place.
531///
532/// Another downside of a prefilter is that it can result in highly variable
533/// performance, where some cases are extraordinarily fast and others aren't.
534/// Typically, variable performance isn't a problem, but it may be for your use
535/// case.
536///
537/// The use of prefilters in this implementation does use a heuristic to detect
538/// when a prefilter might not be carrying its weight, and will dynamically
539/// disable its use. Nevertheless, this configuration option gives callers
540/// the ability to disable prefilters if you have knowledge that they won't be
541/// useful.
542#[derive(Clone, Copy, Debug)]
543#[non_exhaustive]
544pub enum PrefilterConfig {
545    /// Never used a prefilter in substring search.
546    None,
547    /// Automatically detect whether a heuristic prefilter should be used. If
548    /// it is used, then heuristics will be used to dynamically disable the
549    /// prefilter if it is believed to not be carrying its weight.
550    Auto,
551}
552
553impl Default for PrefilterConfig {
554    fn default() -> PrefilterConfig {
555        PrefilterConfig::Auto
556    }
557}
558
559impl PrefilterConfig {
560    /// Returns true when this prefilter is set to the `None` variant.
561    fn is_none(&self) -> bool {
562        matches!(*self, PrefilterConfig::None)
563    }
564}
565
566/// The implementation of a prefilter.
567///
568/// This type encapsulates dispatch to one of several possible choices for a
569/// prefilter. Generally speaking, all prefilters have the same approximate
570/// algorithm: they choose a couple of bytes from the needle that are believed
571/// to be rare, use a fast vector algorithm to look for those bytes and return
572/// positions as candidates for some substring search algorithm (currently only
573/// Two-Way) to confirm as a match or not.
574///
575/// The differences between the algorithms are actually at the vector
576/// implementation level. Namely, we need different routines based on both
577/// which target architecture we're on and what CPU features are supported.
578///
579/// The straight-forwardly obvious approach here is to use an enum, and make
580/// `Prefilter::find` do case analysis to determine which algorithm was
581/// selected and invoke it. However, I've observed that this leads to poor
582/// codegen in some cases, especially in latency sensitive benchmarks. That is,
583/// this approach comes with overhead that I wasn't able to eliminate.
584///
585/// The second obvious approach is to use dynamic dispatch with traits. Doing
586/// that in this context where `Prefilter` owns the selection generally
587/// requires heap allocation, and this code is designed to run in core-only
588/// environments.
589///
590/// So we settle on using a union (that's `PrefilterKind`) and a function
591/// pointer (that's `PrefilterKindFn`). We select the right function pointer
592/// based on which field in the union we set, and that function in turn
593/// knows which field of the union to access. The downside of this approach
594/// is that it forces us to think about safety, but the upside is that
595/// there are some nice latency improvements to benchmarks. (Especially the
596/// `memmem/sliceslice/short` benchmark.)
597///
598/// In cases where we've selected a vector algorithm and the haystack given
599/// is too short, we fallback to the scalar version of `memchr` on the
600/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive
601/// byte-at-a-time loop because it will read in `usize`-sized chunks at a
602/// time.)
603#[derive(Clone, Copy)]
604struct Prefilter {
605    call: PrefilterKindFn,
606    kind: PrefilterKind,
607    rarest_byte: u8,
608    rarest_offset: u8,
609}
610
611impl Prefilter {
612    /// Return a "fallback" prefilter, but only if it is believed to be
613    /// effective.
614    #[inline]
615    fn fallback<R: HeuristicFrequencyRank>(
616        ranker: R,
617        pair: Pair,
618        needle: &[u8],
619    ) -> Option<Prefilter> {
620        /// The maximum frequency rank permitted for the fallback prefilter.
621        /// If the rarest byte in the needle has a frequency rank above this
622        /// value, then no prefilter is used if the fallback prefilter would
623        /// otherwise be selected.
624        const MAX_FALLBACK_RANK: u8 = 250;
625
626        trace!("building fallback prefilter");
627        let rarest_offset = pair.index1();
628        let rarest_byte = needle[usize::from(rarest_offset)];
629        let rarest_rank = ranker.rank(rarest_byte);
630        if rarest_rank > MAX_FALLBACK_RANK {
631            None
632        } else {
633            let finder = crate::arch::all::packedpair::Finder::with_pair(
634                needle,
635                pair.clone(),
636            )?;
637            let call = prefilter_kind_fallback;
638            let kind = PrefilterKind { fallback: finder };
639            Some(Prefilter { call, kind, rarest_byte, rarest_offset })
640        }
641    }
642
643    /// Return a prefilter using a x86_64 SSE2 vector algorithm.
644    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
645    #[inline]
646    fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter {
647        trace!("building x86_64 SSE2 prefilter");
648        let rarest_offset = finder.pair().index1();
649        let rarest_byte = needle[usize::from(rarest_offset)];
650        Prefilter {
651            call: prefilter_kind_sse2,
652            kind: PrefilterKind { sse2: finder },
653            rarest_byte,
654            rarest_offset,
655        }
656    }
657
658    /// Return a prefilter using a x86_64 AVX2 vector algorithm.
659    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
660    #[inline]
661    fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter {
662        trace!("building x86_64 AVX2 prefilter");
663        let rarest_offset = finder.pair().index1();
664        let rarest_byte = needle[usize::from(rarest_offset)];
665        Prefilter {
666            call: prefilter_kind_avx2,
667            kind: PrefilterKind { avx2: finder },
668            rarest_byte,
669            rarest_offset,
670        }
671    }
672
673    /// Return a prefilter using a wasm32 simd128 vector algorithm.
674    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
675    #[inline]
676    fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter {
677        trace!("building wasm32 simd128 prefilter");
678        let rarest_offset = finder.pair().index1();
679        let rarest_byte = needle[usize::from(rarest_offset)];
680        Prefilter {
681            call: prefilter_kind_simd128,
682            kind: PrefilterKind { simd128: finder },
683            rarest_byte,
684            rarest_offset,
685        }
686    }
687
688    /// Return a prefilter using a aarch64 neon vector algorithm.
689    #[cfg(target_arch = "aarch64")]
690    #[inline]
691    fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter {
692        trace!("building aarch64 neon prefilter");
693        let rarest_offset = finder.pair().index1();
694        let rarest_byte = needle[usize::from(rarest_offset)];
695        Prefilter {
696            call: prefilter_kind_neon,
697            kind: PrefilterKind { neon: finder },
698            rarest_byte,
699            rarest_offset,
700        }
701    }
702
703    /// Return a *candidate* position for a match.
704    ///
705    /// When this returns an offset, it implies that a match could begin at
706    /// that offset, but it may not. That is, it is possible for a false
707    /// positive to be returned.
708    ///
709    /// When `None` is returned, then it is guaranteed that there are no
710    /// matches for the needle in the given haystack. That is, it is impossible
711    /// for a false negative to be returned.
712    ///
713    /// The purpose of this routine is to look for candidate matching positions
714    /// as quickly as possible before running a (likely) slower confirmation
715    /// step.
716    #[inline]
717    fn find(&self, haystack: &[u8]) -> Option<usize> {
718        // SAFETY: By construction, we've ensured that the function in
719        // `self.call` is properly paired with the union used in `self.kind`.
720        unsafe { (self.call)(self, haystack) }
721    }
722
723    /// A "simple" prefilter that just looks for the occurrence of the rarest
724    /// byte from the needle. This is generally only used for very small
725    /// haystacks.
726    #[inline]
727    fn find_simple(&self, haystack: &[u8]) -> Option<usize> {
728        // We don't use crate::memchr here because the haystack should be small
729        // enough that memchr won't be able to use vector routines anyway. So
730        // we just skip straight to the fallback implementation which is likely
731        // faster. (A byte-at-a-time loop is only used when the haystack is
732        // smaller than `size_of::<usize>()`.)
733        crate::arch::all::memchr::One::new(self.rarest_byte)
734            .find(haystack)
735            .map(|i| i.saturating_sub(usize::from(self.rarest_offset)))
736    }
737}
738
739impl core::fmt::Debug for Prefilter {
740    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
741        f.debug_struct("Prefilter")
742            .field("call", &"<prefilter function>")
743            .field("kind", &"<prefilter kind union>")
744            .field("rarest_byte", &self.rarest_byte)
745            .field("rarest_offset", &self.rarest_offset)
746            .finish()
747    }
748}
749
750/// A union indicating one of several possible prefilters that are in active
751/// use.
752///
753/// This union should only be read by one of the functions prefixed with
754/// `prefilter_kind_`. Namely, the correct function is meant to be paired with
755/// the union by the caller, such that the function always reads from the
756/// designated union field.
757#[derive(Clone, Copy)]
758union PrefilterKind {
759    fallback: crate::arch::all::packedpair::Finder,
760    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
761    sse2: crate::arch::x86_64::sse2::packedpair::Finder,
762    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
763    avx2: crate::arch::x86_64::avx2::packedpair::Finder,
764    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
765    simd128: crate::arch::wasm32::simd128::packedpair::Finder,
766    #[cfg(target_arch = "aarch64")]
767    neon: crate::arch::aarch64::neon::packedpair::Finder,
768}
769
770/// The type of a prefilter function.
771///
772/// # Safety
773///
774/// When using a function of this type, callers must ensure that the correct
775/// function is paired with the value populated in `PrefilterKind` union.
776type PrefilterKindFn =
777    unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>;
778
779/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback
780/// prefilter. Works on all platforms.
781///
782/// # Safety
783///
784/// Callers must ensure that the `strat.kind.fallback` union field is set.
785unsafe fn prefilter_kind_fallback(
786    strat: &Prefilter,
787    haystack: &[u8],
788) -> Option<usize> {
789    strat.kind.fallback.find_prefilter(haystack)
790}
791
792/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2
793/// prefilter.
794///
795/// # Safety
796///
797/// Callers must ensure that the `strat.kind.sse2` union field is set.
798#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
799unsafe fn prefilter_kind_sse2(
800    strat: &Prefilter,
801    haystack: &[u8],
802) -> Option<usize> {
803    let finder = &strat.kind.sse2;
804    if haystack.len() < finder.min_haystack_len() {
805        strat.find_simple(haystack)
806    } else {
807        finder.find_prefilter(haystack)
808    }
809}
810
811/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2
812/// prefilter.
813///
814/// # Safety
815///
816/// Callers must ensure that the `strat.kind.avx2` union field is set.
817#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
818unsafe fn prefilter_kind_avx2(
819    strat: &Prefilter,
820    haystack: &[u8],
821) -> Option<usize> {
822    let finder = &strat.kind.avx2;
823    if haystack.len() < finder.min_haystack_len() {
824        strat.find_simple(haystack)
825    } else {
826        finder.find_prefilter(haystack)
827    }
828}
829
830/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32
831/// simd128 prefilter.
832///
833/// # Safety
834///
835/// Callers must ensure that the `strat.kind.simd128` union field is set.
836#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
837unsafe fn prefilter_kind_simd128(
838    strat: &Prefilter,
839    haystack: &[u8],
840) -> Option<usize> {
841    let finder = &strat.kind.simd128;
842    if haystack.len() < finder.min_haystack_len() {
843        strat.find_simple(haystack)
844    } else {
845        finder.find_prefilter(haystack)
846    }
847}
848
849/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon
850/// prefilter.
851///
852/// # Safety
853///
854/// Callers must ensure that the `strat.kind.neon` union field is set.
855#[cfg(target_arch = "aarch64")]
856unsafe fn prefilter_kind_neon(
857    strat: &Prefilter,
858    haystack: &[u8],
859) -> Option<usize> {
860    let finder = &strat.kind.neon;
861    if haystack.len() < finder.min_haystack_len() {
862        strat.find_simple(haystack)
863    } else {
864        finder.find_prefilter(haystack)
865    }
866}
867
868/// PrefilterState tracks state associated with the effectiveness of a
869/// prefilter. It is used to track how many bytes, on average, are skipped by
870/// the prefilter. If this average dips below a certain threshold over time,
871/// then the state renders the prefilter inert and stops using it.
872///
873/// A prefilter state should be created for each search. (Where creating an
874/// iterator is treated as a single search.) A prefilter state should only be
875/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
876/// `PrefilterState`.
877#[derive(Clone, Copy, Debug)]
878pub(crate) struct PrefilterState {
879    /// The number of skips that has been executed. This is always 1 greater
880    /// than the actual number of skips. The special sentinel value of 0
881    /// indicates that the prefilter is inert. This is useful to avoid
882    /// additional checks to determine whether the prefilter is still
883    /// "effective." Once a prefilter becomes inert, it should no longer be
884    /// used (according to our heuristics).
885    skips: u32,
886    /// The total number of bytes that have been skipped.
887    skipped: u32,
888}
889
890impl PrefilterState {
891    /// The minimum number of skip attempts to try before considering whether
892    /// a prefilter is effective or not.
893    const MIN_SKIPS: u32 = 50;
894
895    /// The minimum amount of bytes that skipping must average.
896    ///
897    /// This value was chosen based on varying it and checking
898    /// the microbenchmarks. In particular, this can impact the
899    /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
900    /// too low.
901    const MIN_SKIP_BYTES: u32 = 8;
902
903    /// Create a fresh prefilter state.
904    #[inline]
905    pub(crate) fn new() -> PrefilterState {
906        PrefilterState { skips: 1, skipped: 0 }
907    }
908
909    /// Update this state with the number of bytes skipped on the last
910    /// invocation of the prefilter.
911    #[inline]
912    fn update(&mut self, skipped: usize) {
913        self.skips = self.skips.saturating_add(1);
914        // We need to do this dance since it's technically possible for
915        // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
916        // size of a prefilter state.)
917        self.skipped = match u32::try_from(skipped) {
918            Err(_) => core::u32::MAX,
919            Ok(skipped) => self.skipped.saturating_add(skipped),
920        };
921    }
922
923    /// Return true if and only if this state indicates that a prefilter is
924    /// still effective.
925    #[inline]
926    fn is_effective(&mut self) -> bool {
927        if self.is_inert() {
928            return false;
929        }
930        if self.skips() < PrefilterState::MIN_SKIPS {
931            return true;
932        }
933        if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
934            return true;
935        }
936
937        // We're inert.
938        self.skips = 0;
939        false
940    }
941
942    /// Returns true if the prefilter this state represents should no longer
943    /// be used.
944    #[inline]
945    fn is_inert(&self) -> bool {
946        self.skips == 0
947    }
948
949    /// Returns the total number of times the prefilter has been used.
950    #[inline]
951    fn skips(&self) -> u32 {
952        // Remember, `0` is a sentinel value indicating inertness, so we
953        // always need to subtract `1` to get our actual number of skips.
954        self.skips.saturating_sub(1)
955    }
956}
957
958/// A combination of prefilter effectiveness state and the prefilter itself.
959#[derive(Debug)]
960pub(crate) struct Pre<'a> {
961    /// State that tracks the effectiveness of a prefilter.
962    prestate: &'a mut PrefilterState,
963    /// The actual prefilter.
964    prestrat: &'a Prefilter,
965}
966
967impl<'a> Pre<'a> {
968    /// Call this prefilter on the given haystack with the given needle.
969    #[inline]
970    pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> {
971        let result = self.prestrat.find(haystack);
972        self.prestate.update(result.unwrap_or(haystack.len()));
973        result
974    }
975
976    /// Return true if and only if this prefilter should be used.
977    #[inline]
978    pub(crate) fn is_effective(&mut self) -> bool {
979        self.prestate.is_effective()
980    }
981}
982
983/// Returns true if the needle has the right characteristics for a vector
984/// algorithm to handle the entirety of substring search.
985///
986/// Vector algorithms can be used for prefilters for other substring search
987/// algorithms (like Two-Way), but they can also be used for substring search
988/// on their own. When used for substring search, vector algorithms will
989/// quickly identify candidate match positions (just like in the prefilter
990/// case), but instead of returning the candidate position they will try to
991/// confirm the match themselves. Confirmation happens via `memcmp`. This
992/// works well for short needles, but can break down when many false candidate
993/// positions are generated for large needles. Thus, we only permit vector
994/// algorithms to own substring search when the needle is of a certain length.
995#[inline]
996fn do_packed_search(needle: &[u8]) -> bool {
997    /// The minimum length of a needle required for this algorithm. The minimum
998    /// is 2 since a length of 1 should just use memchr and a length of 0 isn't
999    /// a case handled by this searcher.
1000    const MIN_LEN: usize = 2;
1001
1002    /// The maximum length of a needle required for this algorithm.
1003    ///
1004    /// In reality, there is no hard max here. The code below can handle any
1005    /// length needle. (Perhaps that suggests there are missing optimizations.)
1006    /// Instead, this is a heuristic and a bound guaranteeing our linear time
1007    /// complexity.
1008    ///
1009    /// It is a heuristic because when a candidate match is found, memcmp is
1010    /// run. For very large needles with lots of false positives, memcmp can
1011    /// make the code run quite slow.
1012    ///
1013    /// It is a bound because the worst case behavior with memcmp is
1014    /// multiplicative in the size of the needle and haystack, and we want
1015    /// to keep that additive. This bound ensures we still meet that bound
1016    /// theoretically, since it's just a constant. We aren't acting in bad
1017    /// faith here, memcmp on tiny needles is so fast that even in pathological
1018    /// cases (see pathological vector benchmarks), this is still just as fast
1019    /// or faster in practice.
1020    ///
1021    /// This specific number was chosen by tweaking a bit and running
1022    /// benchmarks. The rare-medium-needle, for example, gets about 5% faster
1023    /// by using this algorithm instead of a prefilter-accelerated Two-Way.
1024    /// There's also a theoretical desire to keep this number reasonably
1025    /// low, to mitigate the impact of pathological cases. I did try 64, and
1026    /// some benchmarks got a little better, and others (particularly the
1027    /// pathological ones), got a lot worse. So... 32 it is?
1028    const MAX_LEN: usize = 32;
1029    MIN_LEN <= needle.len() && needle.len() <= MAX_LEN
1030}