aho_corasick/packed/teddy/
builder.rs

1use core::{
2    fmt::Debug,
3    panic::{RefUnwindSafe, UnwindSafe},
4};
5
6use alloc::sync::Arc;
7
8use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match};
9
10/// A builder for constructing a Teddy matcher.
11///
12/// The builder primarily permits fine grained configuration of the Teddy
13/// matcher. Most options are made only available for testing/benchmarking
14/// purposes. In reality, options are automatically determined by the nature
15/// and number of patterns given to the builder.
16#[derive(Clone, Debug)]
17pub(crate) struct Builder {
18    /// When none, this is automatically determined. Otherwise, `false` means
19    /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
20    /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
21    /// available and Fat Teddy was requested, no matcher will be built.
22    only_fat: Option<bool>,
23    /// When none, this is automatically determined. Otherwise, `false` means
24    /// that 128-bit vectors will be used (up to SSSE3 instructions) where as
25    /// `true` means that 256-bit vectors will be used. As with `fat`, if
26    /// 256-bit vectors are requested and they aren't available, then a
27    /// searcher will not be built.
28    only_256bit: Option<bool>,
29    /// When true (the default), the number of patterns will be used as a
30    /// heuristic for refusing construction of a Teddy searcher. The point here
31    /// is that too many patterns can overwhelm Teddy. But this can be disabled
32    /// in cases where the caller knows better.
33    heuristic_pattern_limits: bool,
34}
35
36impl Default for Builder {
37    fn default() -> Builder {
38        Builder::new()
39    }
40}
41
42impl Builder {
43    /// Create a new builder for configuring a Teddy matcher.
44    pub(crate) fn new() -> Builder {
45        Builder {
46            only_fat: None,
47            only_256bit: None,
48            heuristic_pattern_limits: true,
49        }
50    }
51
52    /// Build a matcher for the set of patterns given. If a matcher could not
53    /// be built, then `None` is returned.
54    ///
55    /// Generally, a matcher isn't built if the necessary CPU features aren't
56    /// available, an unsupported target or if the searcher is believed to be
57    /// slower than standard techniques (i.e., if there are too many literals).
58    pub(crate) fn build(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
59        self.build_imp(patterns)
60    }
61
62    /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
63    /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
64    /// for a larger set of literals.
65    ///
66    /// `None` is the default, which results in an automatic selection based
67    /// on the number of literals and available CPU features.
68    pub(crate) fn only_fat(&mut self, yes: Option<bool>) -> &mut Builder {
69        self.only_fat = yes;
70        self
71    }
72
73    /// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
74    /// Generally, a larger vector size is better since it either permits
75    /// matching more patterns or matching more bytes in the haystack at once.
76    ///
77    /// `None` is the default, which results in an automatic selection based on
78    /// the number of literals and available CPU features.
79    pub(crate) fn only_256bit(&mut self, yes: Option<bool>) -> &mut Builder {
80        self.only_256bit = yes;
81        self
82    }
83
84    /// Request that heuristic limitations on the number of patterns be
85    /// employed. This useful to disable for benchmarking where one wants to
86    /// explore how Teddy performs on large number of patterns even if the
87    /// heuristics would otherwise refuse construction.
88    ///
89    /// This is enabled by default.
90    pub(crate) fn heuristic_pattern_limits(
91        &mut self,
92        yes: bool,
93    ) -> &mut Builder {
94        self.heuristic_pattern_limits = yes;
95        self
96    }
97
98    fn build_imp(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
99        let patlimit = self.heuristic_pattern_limits;
100        // There's no particular reason why we limit ourselves to little endian
101        // here, but it seems likely that some parts of Teddy as they are
102        // currently written (e.g., the uses of `trailing_zeros`) are likely
103        // wrong on non-little-endian targets. Such things are likely easy to
104        // fix, but at the time of writing (2023/09/18), I actually do not know
105        // how to test this code on a big-endian target. So for now, we're
106        // conservative and just bail out.
107        if !cfg!(target_endian = "little") {
108            debug!("skipping Teddy because target isn't little endian");
109            return None;
110        }
111        // Too many patterns will overwhelm Teddy and likely lead to slow
112        // downs, typically in the verification step.
113        if patlimit && patterns.len() > 64 {
114            debug!("skipping Teddy because of too many patterns");
115            return None;
116        }
117
118        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
119        {
120            use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3};
121
122            let mask_len = core::cmp::min(4, patterns.minimum_len());
123            let beefy = patterns.len() > 32;
124            let has_avx2 = self::x86_64::is_available_avx2();
125            let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3();
126            let use_avx2 = if self.only_256bit == Some(true) {
127                if !has_avx2 {
128                    debug!(
129                    "skipping Teddy because avx2 was demanded but unavailable"
130                );
131                    return None;
132                }
133                true
134            } else if self.only_256bit == Some(false) {
135                if !has_ssse3 {
136                    debug!(
137                    "skipping Teddy because ssse3 was demanded but unavailable"
138                );
139                    return None;
140                }
141                false
142            } else if !has_ssse3 && !has_avx2 {
143                debug!(
144                    "skipping Teddy because ssse3 and avx2 are unavailable"
145                );
146                return None;
147            } else {
148                has_avx2
149            };
150            let fat = match self.only_fat {
151                None => use_avx2 && beefy,
152                Some(false) => false,
153                Some(true) if !use_avx2 => {
154                    debug!(
155                        "skipping Teddy because fat was demanded, but fat \
156                         Teddy requires avx2 which is unavailable"
157                    );
158                    return None;
159                }
160                Some(true) => true,
161            };
162            // Just like for aarch64, it's possible that too many patterns will
163            // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
164            // helps things scale a bit more by spreading patterns over more
165            // buckets.
166            //
167            // These thresholds were determined by looking at the measurements
168            // for the rust/aho-corasick/packed/leftmost-first and
169            // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
170            // benchmarks.
171            if patlimit && mask_len == 1 && patterns.len() > 16 {
172                debug!(
173                    "skipping Teddy (mask len: 1) because there are \
174                             too many patterns",
175                );
176                return None;
177            }
178            match (mask_len, use_avx2, fat) {
179                (1, false, _) => {
180                    debug!("Teddy choice: 128-bit slim, 1 byte");
181                    SlimSSSE3::<1>::new(&patterns)
182                }
183                (1, true, false) => {
184                    debug!("Teddy choice: 256-bit slim, 1 byte");
185                    SlimAVX2::<1>::new(&patterns)
186                }
187                (1, true, true) => {
188                    debug!("Teddy choice: 256-bit fat, 1 byte");
189                    FatAVX2::<1>::new(&patterns)
190                }
191                (2, false, _) => {
192                    debug!("Teddy choice: 128-bit slim, 2 bytes");
193                    SlimSSSE3::<2>::new(&patterns)
194                }
195                (2, true, false) => {
196                    debug!("Teddy choice: 256-bit slim, 2 bytes");
197                    SlimAVX2::<2>::new(&patterns)
198                }
199                (2, true, true) => {
200                    debug!("Teddy choice: 256-bit fat, 2 bytes");
201                    FatAVX2::<2>::new(&patterns)
202                }
203                (3, false, _) => {
204                    debug!("Teddy choice: 128-bit slim, 3 bytes");
205                    SlimSSSE3::<3>::new(&patterns)
206                }
207                (3, true, false) => {
208                    debug!("Teddy choice: 256-bit slim, 3 bytes");
209                    SlimAVX2::<3>::new(&patterns)
210                }
211                (3, true, true) => {
212                    debug!("Teddy choice: 256-bit fat, 3 bytes");
213                    FatAVX2::<3>::new(&patterns)
214                }
215                (4, false, _) => {
216                    debug!("Teddy choice: 128-bit slim, 4 bytes");
217                    SlimSSSE3::<4>::new(&patterns)
218                }
219                (4, true, false) => {
220                    debug!("Teddy choice: 256-bit slim, 4 bytes");
221                    SlimAVX2::<4>::new(&patterns)
222                }
223                (4, true, true) => {
224                    debug!("Teddy choice: 256-bit fat, 4 bytes");
225                    FatAVX2::<4>::new(&patterns)
226                }
227                _ => {
228                    debug!("no supported Teddy configuration found");
229                    None
230                }
231            }
232        }
233        #[cfg(all(
234            target_arch = "aarch64",
235            target_feature = "neon",
236            target_endian = "little"
237        ))]
238        {
239            use self::aarch64::SlimNeon;
240
241            let mask_len = core::cmp::min(4, patterns.minimum_len());
242            if self.only_256bit == Some(true) {
243                debug!(
244                    "skipping Teddy because 256-bits were demanded \
245                     but unavailable"
246                );
247                return None;
248            }
249            if self.only_fat == Some(true) {
250                debug!(
251                    "skipping Teddy because fat was demanded but unavailable"
252                );
253            }
254            // Since we don't have Fat teddy in aarch64 (I think we'd want at
255            // least 256-bit vectors for that), we need to be careful not to
256            // allow too many patterns as it might overwhelm Teddy. Generally
257            // speaking, as the mask length goes up, the more patterns we can
258            // handle because the mask length results in fewer candidates
259            // generated.
260            //
261            // These thresholds were determined by looking at the measurements
262            // for the rust/aho-corasick/packed/leftmost-first and
263            // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
264            // benchmarks.
265            match mask_len {
266                1 => {
267                    if patlimit && patterns.len() > 16 {
268                        debug!(
269                            "skipping Teddy (mask len: 1) because there are \
270                             too many patterns",
271                        );
272                    }
273                    debug!("Teddy choice: 128-bit slim, 1 byte");
274                    SlimNeon::<1>::new(&patterns)
275                }
276                2 => {
277                    if patlimit && patterns.len() > 32 {
278                        debug!(
279                            "skipping Teddy (mask len: 2) because there are \
280                             too many patterns",
281                        );
282                    }
283                    debug!("Teddy choice: 128-bit slim, 2 bytes");
284                    SlimNeon::<2>::new(&patterns)
285                }
286                3 => {
287                    if patlimit && patterns.len() > 48 {
288                        debug!(
289                            "skipping Teddy (mask len: 3) because there are \
290                             too many patterns",
291                        );
292                    }
293                    debug!("Teddy choice: 128-bit slim, 3 bytes");
294                    SlimNeon::<3>::new(&patterns)
295                }
296                4 => {
297                    debug!("Teddy choice: 128-bit slim, 4 bytes");
298                    SlimNeon::<4>::new(&patterns)
299                }
300                _ => {
301                    debug!("no supported Teddy configuration found");
302                    None
303                }
304            }
305        }
306        #[cfg(not(any(
307            all(target_arch = "x86_64", target_feature = "sse2"),
308            all(
309                target_arch = "aarch64",
310                target_feature = "neon",
311                target_endian = "little"
312            )
313        )))]
314        {
315            None
316        }
317    }
318}
319
320/// A searcher that dispatches to one of several possible Teddy variants.
321#[derive(Clone, Debug)]
322pub(crate) struct Searcher {
323    /// The Teddy variant we use. We use dynamic dispatch under the theory that
324    /// it results in better codegen then a enum, although this is a specious
325    /// claim.
326    ///
327    /// This `Searcher` is essentially a wrapper for a `SearcherT` trait
328    /// object. We just make `memory_usage` and `minimum_len` available without
329    /// going through dynamic dispatch.
330    imp: Arc<dyn SearcherT>,
331    /// Total heap memory used by the Teddy variant.
332    memory_usage: usize,
333    /// The minimum haystack length this searcher can handle. It is intended
334    /// for callers to use some other search routine (such as Rabin-Karp) in
335    /// cases where the haystack (or remainer of the haystack) is too short.
336    minimum_len: usize,
337}
338
339impl Searcher {
340    /// Look for the leftmost occurrence of any pattern in this search in the
341    /// given haystack starting at the given position.
342    ///
343    /// # Panics
344    ///
345    /// This panics when `haystack[at..].len()` is less than the minimum length
346    /// for this haystack.
347    #[inline(always)]
348    pub(crate) fn find(
349        &self,
350        haystack: &[u8],
351        at: usize,
352    ) -> Option<crate::Match> {
353        // SAFETY: The Teddy implementations all require a minimum haystack
354        // length, and this is required for safety. Therefore, we assert it
355        // here in order to make this method sound.
356        assert!(haystack[at..].len() >= self.minimum_len);
357        let hayptr = haystack.as_ptr();
358        // SAFETY: Construction of the searcher guarantees that we are able
359        // to run it in the current environment (i.e., we won't get an AVX2
360        // searcher on a x86-64 CPU without AVX2 support). Also, the pointers
361        // are valid as they are derived directly from a borrowed slice.
362        let teddym = unsafe {
363            self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))?
364        };
365        let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize());
366        let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize());
367        let span = crate::Span { start, end };
368        // OK because we won't permit the construction of a searcher that
369        // could report a pattern ID bigger than what can fit in the crate-wide
370        // PatternID type.
371        let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize());
372        let m = crate::Match::new(pid, span);
373        Some(m)
374    }
375
376    /// Returns the approximate total amount of heap used by this type, in
377    /// units of bytes.
378    #[inline(always)]
379    pub(crate) fn memory_usage(&self) -> usize {
380        self.memory_usage
381    }
382
383    /// Returns the minimum length, in bytes, that a haystack must be in order
384    /// to use it with this searcher.
385    #[inline(always)]
386    pub(crate) fn minimum_len(&self) -> usize {
387        self.minimum_len
388    }
389}
390
391/// A trait that provides dynamic dispatch over the different possible Teddy
392/// variants on the same algorithm.
393///
394/// On `x86_64` for example, it isn't known until runtime which of 12 possible
395/// variants will be used. One might use one of the four slim 128-bit vector
396/// variants, or one of the four 256-bit vector variants or even one of the
397/// four fat 256-bit vector variants.
398///
399/// Since this choice is generally made when the Teddy searcher is constructed
400/// and this choice is based on the patterns given and what the current CPU
401/// supports, it follows that there must be some kind of indirection at search
402/// time that "selects" the variant chosen at build time.
403///
404/// There are a few different ways to go about this. One approach is to use an
405/// enum. It works fine, but in my experiments, this generally results in worse
406/// codegen. Another approach, which is what we use here, is dynamic dispatch
407/// via a trait object. We basically implement this trait for each possible
408/// variant, select the variant we want at build time and convert it to a
409/// trait object for use at search time.
410///
411/// Another approach is to use function pointers and stick each of the possible
412/// variants into a union. This is essentially isomorphic to the dynamic
413/// dispatch approach, but doesn't require any allocations. Since this crate
414/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
415/// `memchr` crate does this.)
416trait SearcherT:
417    Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
418{
419    /// Execute a search on the given haystack (identified by `start` and `end`
420    /// raw pointers).
421    ///
422    /// # Safety
423    ///
424    /// Essentially, the `start` and `end` pointers must be valid and point
425    /// to a haystack one can read. As long as you derive them from, for
426    /// example, a `&[u8]`, they should automatically satisfy all of the safety
427    /// obligations:
428    ///
429    /// * Both `start` and `end` must be valid for reads.
430    /// * Both `start` and `end` must point to an initialized value.
431    /// * Both `start` and `end` must point to the same allocated object and
432    /// must either be in bounds or at most one byte past the end of the
433    /// allocated object.
434    /// * Both `start` and `end` must be _derived from_ a pointer to the same
435    /// object.
436    /// * The distance between `start` and `end` must not overflow `isize`.
437    /// * The distance being in bounds must not rely on "wrapping around" the
438    /// address space.
439    /// * It must be the case that `start <= end`.
440    /// * `end - start` must be greater than the minimum length for this
441    /// searcher.
442    ///
443    /// Also, it is expected that implementations of this trait will tag this
444    /// method with a `target_feature` attribute. Callers must ensure that
445    /// they are executing this method in an environment where that attribute
446    /// is valid.
447    unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match>;
448}
449
450#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
451mod x86_64 {
452    use core::arch::x86_64::{__m128i, __m256i};
453
454    use alloc::sync::Arc;
455
456    use crate::packed::{
457        ext::Pointer,
458        pattern::Patterns,
459        teddy::generic::{self, Match},
460    };
461
462    use super::{Searcher, SearcherT};
463
464    #[derive(Clone, Debug)]
465    pub(super) struct SlimSSSE3<const BYTES: usize> {
466        slim128: generic::Slim<__m128i, BYTES>,
467    }
468
469    // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
470    macro_rules! slim_ssse3 {
471        ($len:expr) => {
472            impl SlimSSSE3<$len> {
473                /// Creates a new searcher using "slim" Teddy with 128-bit
474                /// vectors. If SSSE3 is not available in the current
475                /// environment, then this returns `None`.
476                pub(super) fn new(
477                    patterns: &Arc<Patterns>,
478                ) -> Option<Searcher> {
479                    if !is_available_ssse3() {
480                        return None;
481                    }
482                    Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) })
483                }
484
485                /// Creates a new searcher using "slim" Teddy with 256-bit
486                /// vectors without checking whether SSSE3 is available or not.
487                ///
488                /// # Safety
489                ///
490                /// Callers must ensure that SSSE3 is available in the current
491                /// environment.
492                #[target_feature(enable = "ssse3")]
493                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
494                    let slim128 = generic::Slim::<__m128i, $len>::new(
495                        Arc::clone(patterns),
496                    );
497                    let memory_usage = slim128.memory_usage();
498                    let minimum_len = slim128.minimum_len();
499                    let imp = Arc::new(SlimSSSE3 { slim128 });
500                    Searcher { imp, memory_usage, minimum_len }
501                }
502            }
503
504            impl SearcherT for SlimSSSE3<$len> {
505                #[target_feature(enable = "ssse3")]
506                #[inline]
507                unsafe fn find(
508                    &self,
509                    start: *const u8,
510                    end: *const u8,
511                ) -> Option<Match> {
512                    // SAFETY: All obligations except for `target_feature` are
513                    // passed to the caller. Our use of `target_feature` is
514                    // safe because construction of this type requires that the
515                    // requisite target features are available.
516                    self.slim128.find(start, end)
517                }
518            }
519        };
520    }
521
522    slim_ssse3!(1);
523    slim_ssse3!(2);
524    slim_ssse3!(3);
525    slim_ssse3!(4);
526
527    #[derive(Clone, Debug)]
528    pub(super) struct SlimAVX2<const BYTES: usize> {
529        slim128: generic::Slim<__m128i, BYTES>,
530        slim256: generic::Slim<__m256i, BYTES>,
531    }
532
533    // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
534    macro_rules! slim_avx2 {
535        ($len:expr) => {
536            impl SlimAVX2<$len> {
537                /// Creates a new searcher using "slim" Teddy with 256-bit
538                /// vectors. If AVX2 is not available in the current
539                /// environment, then this returns `None`.
540                pub(super) fn new(
541                    patterns: &Arc<Patterns>,
542                ) -> Option<Searcher> {
543                    if !is_available_avx2() {
544                        return None;
545                    }
546                    Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) })
547                }
548
549                /// Creates a new searcher using "slim" Teddy with 256-bit
550                /// vectors without checking whether AVX2 is available or not.
551                ///
552                /// # Safety
553                ///
554                /// Callers must ensure that AVX2 is available in the current
555                /// environment.
556                #[target_feature(enable = "avx2")]
557                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
558                    let slim128 = generic::Slim::<__m128i, $len>::new(
559                        Arc::clone(&patterns),
560                    );
561                    let slim256 = generic::Slim::<__m256i, $len>::new(
562                        Arc::clone(&patterns),
563                    );
564                    let memory_usage =
565                        slim128.memory_usage() + slim256.memory_usage();
566                    let minimum_len = slim128.minimum_len();
567                    let imp = Arc::new(SlimAVX2 { slim128, slim256 });
568                    Searcher { imp, memory_usage, minimum_len }
569                }
570            }
571
572            impl SearcherT for SlimAVX2<$len> {
573                #[target_feature(enable = "avx2")]
574                #[inline]
575                unsafe fn find(
576                    &self,
577                    start: *const u8,
578                    end: *const u8,
579                ) -> Option<Match> {
580                    // SAFETY: All obligations except for `target_feature` are
581                    // passed to the caller. Our use of `target_feature` is
582                    // safe because construction of this type requires that the
583                    // requisite target features are available.
584                    let len = end.distance(start);
585                    if len < self.slim256.minimum_len() {
586                        self.slim128.find(start, end)
587                    } else {
588                        self.slim256.find(start, end)
589                    }
590                }
591            }
592        };
593    }
594
595    slim_avx2!(1);
596    slim_avx2!(2);
597    slim_avx2!(3);
598    slim_avx2!(4);
599
600    #[derive(Clone, Debug)]
601    pub(super) struct FatAVX2<const BYTES: usize> {
602        fat256: generic::Fat<__m256i, BYTES>,
603    }
604
605    // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
606    macro_rules! fat_avx2 {
607        ($len:expr) => {
608            impl FatAVX2<$len> {
609                /// Creates a new searcher using "slim" Teddy with 256-bit
610                /// vectors. If AVX2 is not available in the current
611                /// environment, then this returns `None`.
612                pub(super) fn new(
613                    patterns: &Arc<Patterns>,
614                ) -> Option<Searcher> {
615                    if !is_available_avx2() {
616                        return None;
617                    }
618                    Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) })
619                }
620
621                /// Creates a new searcher using "slim" Teddy with 256-bit
622                /// vectors without checking whether AVX2 is available or not.
623                ///
624                /// # Safety
625                ///
626                /// Callers must ensure that AVX2 is available in the current
627                /// environment.
628                #[target_feature(enable = "avx2")]
629                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
630                    let fat256 = generic::Fat::<__m256i, $len>::new(
631                        Arc::clone(&patterns),
632                    );
633                    let memory_usage = fat256.memory_usage();
634                    let minimum_len = fat256.minimum_len();
635                    let imp = Arc::new(FatAVX2 { fat256 });
636                    Searcher { imp, memory_usage, minimum_len }
637                }
638            }
639
640            impl SearcherT for FatAVX2<$len> {
641                #[target_feature(enable = "avx2")]
642                #[inline]
643                unsafe fn find(
644                    &self,
645                    start: *const u8,
646                    end: *const u8,
647                ) -> Option<Match> {
648                    // SAFETY: All obligations except for `target_feature` are
649                    // passed to the caller. Our use of `target_feature` is
650                    // safe because construction of this type requires that the
651                    // requisite target features are available.
652                    self.fat256.find(start, end)
653                }
654            }
655        };
656    }
657
658    fat_avx2!(1);
659    fat_avx2!(2);
660    fat_avx2!(3);
661    fat_avx2!(4);
662
663    #[inline]
664    pub(super) fn is_available_ssse3() -> bool {
665        #[cfg(not(target_feature = "sse2"))]
666        {
667            false
668        }
669        #[cfg(target_feature = "sse2")]
670        {
671            #[cfg(target_feature = "ssse3")]
672            {
673                true
674            }
675            #[cfg(not(target_feature = "ssse3"))]
676            {
677                #[cfg(feature = "std")]
678                {
679                    std::is_x86_feature_detected!("ssse3")
680                }
681                #[cfg(not(feature = "std"))]
682                {
683                    false
684                }
685            }
686        }
687    }
688
689    #[inline]
690    pub(super) fn is_available_avx2() -> bool {
691        #[cfg(not(target_feature = "sse2"))]
692        {
693            false
694        }
695        #[cfg(target_feature = "sse2")]
696        {
697            #[cfg(target_feature = "avx2")]
698            {
699                true
700            }
701            #[cfg(not(target_feature = "avx2"))]
702            {
703                #[cfg(feature = "std")]
704                {
705                    std::is_x86_feature_detected!("avx2")
706                }
707                #[cfg(not(feature = "std"))]
708                {
709                    false
710                }
711            }
712        }
713    }
714}
715
716#[cfg(all(
717    target_arch = "aarch64",
718    target_feature = "neon",
719    target_endian = "little"
720))]
721mod aarch64 {
722    use core::arch::aarch64::uint8x16_t;
723
724    use alloc::sync::Arc;
725
726    use crate::packed::{
727        pattern::Patterns,
728        teddy::generic::{self, Match},
729    };
730
731    use super::{Searcher, SearcherT};
732
733    #[derive(Clone, Debug)]
734    pub(super) struct SlimNeon<const BYTES: usize> {
735        slim128: generic::Slim<uint8x16_t, BYTES>,
736    }
737
738    // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
739    macro_rules! slim_neon {
740        ($len:expr) => {
741            impl SlimNeon<$len> {
742                /// Creates a new searcher using "slim" Teddy with 128-bit
743                /// vectors. If SSSE3 is not available in the current
744                /// environment, then this returns `None`.
745                pub(super) fn new(
746                    patterns: &Arc<Patterns>,
747                ) -> Option<Searcher> {
748                    Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) })
749                }
750
751                /// Creates a new searcher using "slim" Teddy with 256-bit
752                /// vectors without checking whether SSSE3 is available or not.
753                ///
754                /// # Safety
755                ///
756                /// Callers must ensure that SSSE3 is available in the current
757                /// environment.
758                #[target_feature(enable = "neon")]
759                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
760                    let slim128 = generic::Slim::<uint8x16_t, $len>::new(
761                        Arc::clone(patterns),
762                    );
763                    let memory_usage = slim128.memory_usage();
764                    let minimum_len = slim128.minimum_len();
765                    let imp = Arc::new(SlimNeon { slim128 });
766                    Searcher { imp, memory_usage, minimum_len }
767                }
768            }
769
770            impl SearcherT for SlimNeon<$len> {
771                #[target_feature(enable = "neon")]
772                #[inline]
773                unsafe fn find(
774                    &self,
775                    start: *const u8,
776                    end: *const u8,
777                ) -> Option<Match> {
778                    // SAFETY: All obligations except for `target_feature` are
779                    // passed to the caller. Our use of `target_feature` is
780                    // safe because construction of this type requires that the
781                    // requisite target features are available.
782                    self.slim128.find(start, end)
783                }
784            }
785        };
786    }
787
788    slim_neon!(1);
789    slim_neon!(2);
790    slim_neon!(3);
791    slim_neon!(4);
792}