memchr/arch/x86_64/avx2/
packedpair.rs

1/*!
2A 256-bit vector implementation of the "packed pair" SIMD algorithm.
3
4The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
5difference is that it (by default) uses a background distribution of byte
6frequencies to heuristically select the pair of bytes to search for.
7
8[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
9*/
10
11use core::arch::x86_64::{__m128i, __m256i};
12
13use crate::arch::{all::packedpair::Pair, generic::packedpair};
14
15/// A "packed pair" finder that uses 256-bit vector operations.
16///
17/// This finder picks two bytes that it believes have high predictive power
18/// for indicating an overall match of a needle. Depending on whether
19/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
20/// where the needle matches or could match. In the prefilter case, candidates
21/// are reported whenever the [`Pair`] of bytes given matches.
22#[derive(Clone, Copy, Debug)]
23pub struct Finder {
24    sse2: packedpair::Finder<__m128i>,
25    avx2: packedpair::Finder<__m256i>,
26}
27
28impl Finder {
29    /// Create a new pair searcher. The searcher returned can either report
30    /// exact matches of `needle` or act as a prefilter and report candidate
31    /// positions of `needle`.
32    ///
33    /// If AVX2 is unavailable in the current environment or if a [`Pair`]
34    /// could not be constructed from the needle given, then `None` is
35    /// returned.
36    #[inline]
37    pub fn new(needle: &[u8]) -> Option<Finder> {
38        Finder::with_pair(needle, Pair::new(needle)?)
39    }
40
41    /// Create a new "packed pair" finder using the pair of bytes given.
42    ///
43    /// This constructor permits callers to control precisely which pair of
44    /// bytes is used as a predicate.
45    ///
46    /// If AVX2 is unavailable in the current environment, then `None` is
47    /// returned.
48    #[inline]
49    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
50        if Finder::is_available() {
51            // SAFETY: we check that sse2/avx2 is available above. We are also
52            // guaranteed to have needle.len() > 1 because we have a valid
53            // Pair.
54            unsafe { Some(Finder::with_pair_impl(needle, pair)) }
55        } else {
56            None
57        }
58    }
59
60    /// Create a new `Finder` specific to SSE2 vectors and routines.
61    ///
62    /// # Safety
63    ///
64    /// Same as the safety for `packedpair::Finder::new`, and callers must also
65    /// ensure that both SSE2 and AVX2 are available.
66    #[target_feature(enable = "sse2", enable = "avx2")]
67    #[inline]
68    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
69        let sse2 = packedpair::Finder::<__m128i>::new(needle, pair);
70        let avx2 = packedpair::Finder::<__m256i>::new(needle, pair);
71        Finder { sse2, avx2 }
72    }
73
74    /// Returns true when this implementation is available in the current
75    /// environment.
76    ///
77    /// When this is true, it is guaranteed that [`Finder::with_pair`] will
78    /// return a `Some` value. Similarly, when it is false, it is guaranteed
79    /// that `Finder::with_pair` will return a `None` value. Notice that this
80    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
81    /// even when `Finder::is_available` is true, it is not guaranteed that a
82    /// valid [`Pair`] can be found from the needle given.
83    ///
84    /// Note also that for the lifetime of a single program, if this returns
85    /// true then it will always return true.
86    #[inline]
87    pub fn is_available() -> bool {
88        #[cfg(not(target_feature = "sse2"))]
89        {
90            false
91        }
92        #[cfg(target_feature = "sse2")]
93        {
94            #[cfg(target_feature = "avx2")]
95            {
96                true
97            }
98            #[cfg(not(target_feature = "avx2"))]
99            {
100                #[cfg(feature = "std")]
101                {
102                    std::is_x86_feature_detected!("avx2")
103                }
104                #[cfg(not(feature = "std"))]
105                {
106                    false
107                }
108            }
109        }
110    }
111
112    /// Execute a search using AVX2 vectors and routines.
113    ///
114    /// # Panics
115    ///
116    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
117    #[inline]
118    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
119        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
120        unsafe { self.find_impl(haystack, needle) }
121    }
122
123    /// Run this finder on the given haystack as a prefilter.
124    ///
125    /// If a candidate match is found, then an offset where the needle *could*
126    /// begin in the haystack is returned.
127    ///
128    /// # Panics
129    ///
130    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
131    #[inline]
132    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
133        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
134        unsafe { self.find_prefilter_impl(haystack) }
135    }
136
137    /// Execute a search using AVX2 vectors and routines.
138    ///
139    /// # Panics
140    ///
141    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
142    ///
143    /// # Safety
144    ///
145    /// (The target feature safety obligation is automatically fulfilled by
146    /// virtue of being a method on `Finder`, which can only be constructed
147    /// when it is safe to call `sse2` and `avx2` routines.)
148    #[target_feature(enable = "sse2", enable = "avx2")]
149    #[inline]
150    unsafe fn find_impl(
151        &self,
152        haystack: &[u8],
153        needle: &[u8],
154    ) -> Option<usize> {
155        if haystack.len() < self.avx2.min_haystack_len() {
156            self.sse2.find(haystack, needle)
157        } else {
158            self.avx2.find(haystack, needle)
159        }
160    }
161
162    /// Execute a prefilter search using AVX2 vectors and routines.
163    ///
164    /// # Panics
165    ///
166    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
167    ///
168    /// # Safety
169    ///
170    /// (The target feature safety obligation is automatically fulfilled by
171    /// virtue of being a method on `Finder`, which can only be constructed
172    /// when it is safe to call `sse2` and `avx2` routines.)
173    #[target_feature(enable = "sse2", enable = "avx2")]
174    #[inline]
175    unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
176        if haystack.len() < self.avx2.min_haystack_len() {
177            self.sse2.find_prefilter(haystack)
178        } else {
179            self.avx2.find_prefilter(haystack)
180        }
181    }
182
183    /// Returns the pair of offsets (into the needle) used to check as a
184    /// predicate before confirming whether a needle exists at a particular
185    /// position.
186    #[inline]
187    pub fn pair(&self) -> &Pair {
188        self.avx2.pair()
189    }
190
191    /// Returns the minimum haystack length that this `Finder` can search.
192    ///
193    /// Using a haystack with length smaller than this in a search will result
194    /// in a panic. The reason for this restriction is that this finder is
195    /// meant to be a low-level component that is part of a larger substring
196    /// strategy. In that sense, it avoids trying to handle all cases and
197    /// instead only handles the cases that it can handle very well.
198    #[inline]
199    pub fn min_haystack_len(&self) -> usize {
200        // The caller doesn't need to care about AVX2's min_haystack_len
201        // since this implementation will automatically switch to the SSE2
202        // implementation if the haystack is too short for AVX2. Therefore, the
203        // caller only needs to care about SSE2's min_haystack_len.
204        //
205        // This does assume that SSE2's min_haystack_len is less than or
206        // equal to AVX2's min_haystack_len. In practice, this is true and
207        // there is no way it could be false based on how this Finder is
208        // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If
209        // they used different pairs, then it's possible (although perhaps
210        // pathological) for SSE2's min_haystack_len to be bigger than AVX2's.
211        self.sse2.min_haystack_len()
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
220        let f = Finder::new(needle)?;
221        if haystack.len() < f.min_haystack_len() {
222            return None;
223        }
224        Some(f.find(haystack, needle))
225    }
226
227    define_substring_forward_quickcheck!(find);
228
229    #[test]
230    fn forward_substring() {
231        crate::tests::substring::Runner::new().fwd(find).run()
232    }
233
234    #[test]
235    fn forward_packedpair() {
236        fn find(
237            haystack: &[u8],
238            needle: &[u8],
239            index1: u8,
240            index2: u8,
241        ) -> Option<Option<usize>> {
242            let pair = Pair::with_indices(needle, index1, index2)?;
243            let f = Finder::with_pair(needle, pair)?;
244            if haystack.len() < f.min_haystack_len() {
245                return None;
246            }
247            Some(f.find(haystack, needle))
248        }
249        crate::tests::packedpair::Runner::new().fwd(find).run()
250    }
251
252    #[test]
253    fn forward_packedpair_prefilter() {
254        fn find(
255            haystack: &[u8],
256            needle: &[u8],
257            index1: u8,
258            index2: u8,
259        ) -> Option<Option<usize>> {
260            if !cfg!(target_feature = "sse2") {
261                return None;
262            }
263            let pair = Pair::with_indices(needle, index1, index2)?;
264            let f = Finder::with_pair(needle, pair)?;
265            if haystack.len() < f.min_haystack_len() {
266                return None;
267            }
268            Some(f.find_prefilter(haystack))
269        }
270        crate::tests::packedpair::Runner::new().fwd(find).run()
271    }
272}