memchr/arch/x86_64/sse2/
memchr.rs

1/*!
2This module defines 128-bit vector implementations of `memchr` and friends.
3
4The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5searching for one, two or three distinct bytes, respectively, in a haystack.
6Each type also has corresponding double ended iterators. These searchers are
7typically much faster than scalar routines accomplishing the same task.
8
9The `One` searcher also provides a [`One::count`] routine for efficiently
10counting the number of times a single byte occurs in a haystack. This is
11useful, for example, for counting the number of lines in a haystack. This
12routine exists because it is usually faster, especially with a high match
13count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
14`Iterator::count` implementation to use this routine.)
15
16Only one, two and three bytes are supported because three bytes is about
17the point where one sees diminishing returns. Beyond this point and it's
18probably (but not necessarily) better to just use a simple `[bool; 256]` array
19or similar. However, it depends mightily on the specific work-load and the
20expected match frequency.
21*/
22
23use core::arch::x86_64::__m128i;
24
25use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
26
27/// Finds all occurrences of a single byte in a haystack.
28#[derive(Clone, Copy, Debug)]
29pub struct One(generic::One<__m128i>);
30
31impl One {
32    /// Create a new searcher that finds occurrences of the needle byte given.
33    ///
34    /// This particular searcher is specialized to use SSE2 vector instructions
35    /// that typically make it quite fast.
36    ///
37    /// If SSE2 is unavailable in the current environment, then `None` is
38    /// returned.
39    #[inline]
40    pub fn new(needle: u8) -> Option<One> {
41        if One::is_available() {
42            // SAFETY: we check that sse2 is available above.
43            unsafe { Some(One::new_unchecked(needle)) }
44        } else {
45            None
46        }
47    }
48
49    /// Create a new finder specific to SSE2 vectors and routines without
50    /// checking that SSE2 is available.
51    ///
52    /// # Safety
53    ///
54    /// Callers must guarantee that it is safe to execute `sse2` instructions
55    /// in the current environment.
56    ///
57    /// Note that it is a common misconception that if one compiles for an
58    /// `x86_64` target, then they therefore automatically have access to SSE2
59    /// instructions. While this is almost always the case, it isn't true in
60    /// 100% of cases.
61    #[target_feature(enable = "sse2")]
62    #[inline]
63    pub unsafe fn new_unchecked(needle: u8) -> One {
64        One(generic::One::new(needle))
65    }
66
67    /// Returns true when this implementation is available in the current
68    /// environment.
69    ///
70    /// When this is true, it is guaranteed that [`One::new`] will return
71    /// a `Some` value. Similarly, when it is false, it is guaranteed that
72    /// `One::new` will return a `None` value.
73    ///
74    /// Note also that for the lifetime of a single program, if this returns
75    /// true then it will always return true.
76    #[inline]
77    pub fn is_available() -> bool {
78        #[cfg(target_feature = "sse2")]
79        {
80            true
81        }
82        #[cfg(not(target_feature = "sse2"))]
83        {
84            false
85        }
86    }
87
88    /// Return the first occurrence of one of the needle bytes in the given
89    /// haystack. If no such occurrence exists, then `None` is returned.
90    ///
91    /// The occurrence is reported as an offset into `haystack`. Its maximum
92    /// value is `haystack.len() - 1`.
93    #[inline]
94    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
95        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
96        // falls within the bounds of the start and end pointers.
97        unsafe {
98            generic::search_slice_with_raw(haystack, |s, e| {
99                self.find_raw(s, e)
100            })
101        }
102    }
103
104    /// Return the last occurrence of one of the needle bytes in the given
105    /// haystack. If no such occurrence exists, then `None` is returned.
106    ///
107    /// The occurrence is reported as an offset into `haystack`. Its maximum
108    /// value is `haystack.len() - 1`.
109    #[inline]
110    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
111        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
112        // falls within the bounds of the start and end pointers.
113        unsafe {
114            generic::search_slice_with_raw(haystack, |s, e| {
115                self.rfind_raw(s, e)
116            })
117        }
118    }
119
120    /// Counts all occurrences of this byte in the given haystack.
121    #[inline]
122    pub fn count(&self, haystack: &[u8]) -> usize {
123        // SAFETY: All of our pointers are derived directly from a borrowed
124        // slice, which is guaranteed to be valid.
125        unsafe {
126            let start = haystack.as_ptr();
127            let end = start.add(haystack.len());
128            self.count_raw(start, end)
129        }
130    }
131
132    /// Like `find`, but accepts and returns raw pointers.
133    ///
134    /// When a match is found, the pointer returned is guaranteed to be
135    /// `>= start` and `< end`.
136    ///
137    /// This routine is useful if you're already using raw pointers and would
138    /// like to avoid converting back to a slice before executing a search.
139    ///
140    /// # Safety
141    ///
142    /// * Both `start` and `end` must be valid for reads.
143    /// * Both `start` and `end` must point to an initialized value.
144    /// * Both `start` and `end` must point to the same allocated object and
145    /// must either be in bounds or at most one byte past the end of the
146    /// allocated object.
147    /// * Both `start` and `end` must be _derived from_ a pointer to the same
148    /// object.
149    /// * The distance between `start` and `end` must not overflow `isize`.
150    /// * The distance being in bounds must not rely on "wrapping around" the
151    /// address space.
152    ///
153    /// Note that callers may pass a pair of pointers such that `start >= end`.
154    /// In that case, `None` will always be returned.
155    #[inline]
156    pub unsafe fn find_raw(
157        &self,
158        start: *const u8,
159        end: *const u8,
160    ) -> Option<*const u8> {
161        if start >= end {
162            return None;
163        }
164        if end.distance(start) < __m128i::BYTES {
165            // SAFETY: We require the caller to pass valid start/end pointers.
166            return generic::fwd_byte_by_byte(start, end, |b| {
167                b == self.0.needle1()
168            });
169        }
170        // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
171        // Also, we've checked that our haystack is big enough to run on the
172        // vector routine. Pointer validity is caller's responsibility.
173        //
174        // Note that we could call `self.0.find_raw` directly here. But that
175        // means we'd have to annotate this routine with `target_feature`.
176        // Which is fine, because this routine is `unsafe` anyway and the
177        // `target_feature` obligation is met by virtue of building a `One`.
178        // The real problem is that a routine with a `target_feature`
179        // annotation generally can't be inlined into caller code unless the
180        // caller code has the same target feature annotations. Which is maybe
181        // okay for SSE2, but we do the same thing for AVX2 where caller code
182        // probably usually doesn't have AVX2 enabled. That means that this
183        // routine can be inlined which will handle some of the short-haystack
184        // cases above without touching the architecture specific code.
185        self.find_raw_impl(start, end)
186    }
187
188    /// Like `rfind`, but accepts and returns raw pointers.
189    ///
190    /// When a match is found, the pointer returned is guaranteed to be
191    /// `>= start` and `< end`.
192    ///
193    /// This routine is useful if you're already using raw pointers and would
194    /// like to avoid converting back to a slice before executing a search.
195    ///
196    /// # Safety
197    ///
198    /// * Both `start` and `end` must be valid for reads.
199    /// * Both `start` and `end` must point to an initialized value.
200    /// * Both `start` and `end` must point to the same allocated object and
201    /// must either be in bounds or at most one byte past the end of the
202    /// allocated object.
203    /// * Both `start` and `end` must be _derived from_ a pointer to the same
204    /// object.
205    /// * The distance between `start` and `end` must not overflow `isize`.
206    /// * The distance being in bounds must not rely on "wrapping around" the
207    /// address space.
208    ///
209    /// Note that callers may pass a pair of pointers such that `start >= end`.
210    /// In that case, `None` will always be returned.
211    #[inline]
212    pub unsafe fn rfind_raw(
213        &self,
214        start: *const u8,
215        end: *const u8,
216    ) -> Option<*const u8> {
217        if start >= end {
218            return None;
219        }
220        if end.distance(start) < __m128i::BYTES {
221            // SAFETY: We require the caller to pass valid start/end pointers.
222            return generic::rev_byte_by_byte(start, end, |b| {
223                b == self.0.needle1()
224            });
225        }
226        // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
227        // Also, we've checked that our haystack is big enough to run on the
228        // vector routine. Pointer validity is caller's responsibility.
229        //
230        // See note in forward routine above for why we don't just call
231        // `self.0.rfind_raw` directly here.
232        self.rfind_raw_impl(start, end)
233    }
234
235    /// Counts all occurrences of this byte in the given haystack represented
236    /// by raw pointers.
237    ///
238    /// This routine is useful if you're already using raw pointers and would
239    /// like to avoid converting back to a slice before executing a search.
240    ///
241    /// # Safety
242    ///
243    /// * Both `start` and `end` must be valid for reads.
244    /// * Both `start` and `end` must point to an initialized value.
245    /// * Both `start` and `end` must point to the same allocated object and
246    /// must either be in bounds or at most one byte past the end of the
247    /// allocated object.
248    /// * Both `start` and `end` must be _derived from_ a pointer to the same
249    /// object.
250    /// * The distance between `start` and `end` must not overflow `isize`.
251    /// * The distance being in bounds must not rely on "wrapping around" the
252    /// address space.
253    ///
254    /// Note that callers may pass a pair of pointers such that `start >= end`.
255    /// In that case, `0` will always be returned.
256    #[inline]
257    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
258        if start >= end {
259            return 0;
260        }
261        if end.distance(start) < __m128i::BYTES {
262            // SAFETY: We require the caller to pass valid start/end pointers.
263            return generic::count_byte_by_byte(start, end, |b| {
264                b == self.0.needle1()
265            });
266        }
267        // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
268        // Also, we've checked that our haystack is big enough to run on the
269        // vector routine. Pointer validity is caller's responsibility.
270        self.count_raw_impl(start, end)
271    }
272
273    /// Execute a search using SSE2 vectors and routines.
274    ///
275    /// # Safety
276    ///
277    /// Same as [`One::find_raw`], except the distance between `start` and
278    /// `end` must be at least the size of an SSE2 vector (in bytes).
279    ///
280    /// (The target feature safety obligation is automatically fulfilled by
281    /// virtue of being a method on `One`, which can only be constructed
282    /// when it is safe to call `sse2` routines.)
283    #[target_feature(enable = "sse2")]
284    #[inline]
285    unsafe fn find_raw_impl(
286        &self,
287        start: *const u8,
288        end: *const u8,
289    ) -> Option<*const u8> {
290        self.0.find_raw(start, end)
291    }
292
293    /// Execute a search using SSE2 vectors and routines.
294    ///
295    /// # Safety
296    ///
297    /// Same as [`One::rfind_raw`], except the distance between `start` and
298    /// `end` must be at least the size of an SSE2 vector (in bytes).
299    ///
300    /// (The target feature safety obligation is automatically fulfilled by
301    /// virtue of being a method on `One`, which can only be constructed
302    /// when it is safe to call `sse2` routines.)
303    #[target_feature(enable = "sse2")]
304    #[inline]
305    unsafe fn rfind_raw_impl(
306        &self,
307        start: *const u8,
308        end: *const u8,
309    ) -> Option<*const u8> {
310        self.0.rfind_raw(start, end)
311    }
312
313    /// Execute a count using SSE2 vectors and routines.
314    ///
315    /// # Safety
316    ///
317    /// Same as [`One::count_raw`], except the distance between `start` and
318    /// `end` must be at least the size of an SSE2 vector (in bytes).
319    ///
320    /// (The target feature safety obligation is automatically fulfilled by
321    /// virtue of being a method on `One`, which can only be constructed
322    /// when it is safe to call `sse2` routines.)
323    #[target_feature(enable = "sse2")]
324    #[inline]
325    unsafe fn count_raw_impl(
326        &self,
327        start: *const u8,
328        end: *const u8,
329    ) -> usize {
330        self.0.count_raw(start, end)
331    }
332
333    /// Returns an iterator over all occurrences of the needle byte in the
334    /// given haystack.
335    ///
336    /// The iterator returned implements `DoubleEndedIterator`. This means it
337    /// can also be used to find occurrences in reverse order.
338    #[inline]
339    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
340        OneIter { searcher: self, it: generic::Iter::new(haystack) }
341    }
342}
343
344/// An iterator over all occurrences of a single byte in a haystack.
345///
346/// This iterator implements `DoubleEndedIterator`, which means it can also be
347/// used to find occurrences in reverse order.
348///
349/// This iterator is created by the [`One::iter`] method.
350///
351/// The lifetime parameters are as follows:
352///
353/// * `'a` refers to the lifetime of the underlying [`One`] searcher.
354/// * `'h` refers to the lifetime of the haystack being searched.
355#[derive(Clone, Debug)]
356pub struct OneIter<'a, 'h> {
357    searcher: &'a One,
358    it: generic::Iter<'h>,
359}
360
361impl<'a, 'h> Iterator for OneIter<'a, 'h> {
362    type Item = usize;
363
364    #[inline]
365    fn next(&mut self) -> Option<usize> {
366        // SAFETY: We rely on the generic iterator to provide valid start
367        // and end pointers, but we guarantee that any pointer returned by
368        // 'find_raw' falls within the bounds of the start and end pointer.
369        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
370    }
371
372    #[inline]
373    fn count(self) -> usize {
374        self.it.count(|s, e| {
375            // SAFETY: We rely on our generic iterator to return valid start
376            // and end pointers.
377            unsafe { self.searcher.count_raw(s, e) }
378        })
379    }
380
381    #[inline]
382    fn size_hint(&self) -> (usize, Option<usize>) {
383        self.it.size_hint()
384    }
385}
386
387impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
388    #[inline]
389    fn next_back(&mut self) -> Option<usize> {
390        // SAFETY: We rely on the generic iterator to provide valid start
391        // and end pointers, but we guarantee that any pointer returned by
392        // 'rfind_raw' falls within the bounds of the start and end pointer.
393        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
394    }
395}
396
397impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
398
399/// Finds all occurrences of two bytes in a haystack.
400///
401/// That is, this reports matches of one of two possible bytes. For example,
402/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
403/// `4` and `5`.
404#[derive(Clone, Copy, Debug)]
405pub struct Two(generic::Two<__m128i>);
406
407impl Two {
408    /// Create a new searcher that finds occurrences of the needle bytes given.
409    ///
410    /// This particular searcher is specialized to use SSE2 vector instructions
411    /// that typically make it quite fast.
412    ///
413    /// If SSE2 is unavailable in the current environment, then `None` is
414    /// returned.
415    #[inline]
416    pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
417        if Two::is_available() {
418            // SAFETY: we check that sse2 is available above.
419            unsafe { Some(Two::new_unchecked(needle1, needle2)) }
420        } else {
421            None
422        }
423    }
424
425    /// Create a new finder specific to SSE2 vectors and routines without
426    /// checking that SSE2 is available.
427    ///
428    /// # Safety
429    ///
430    /// Callers must guarantee that it is safe to execute `sse2` instructions
431    /// in the current environment.
432    ///
433    /// Note that it is a common misconception that if one compiles for an
434    /// `x86_64` target, then they therefore automatically have access to SSE2
435    /// instructions. While this is almost always the case, it isn't true in
436    /// 100% of cases.
437    #[target_feature(enable = "sse2")]
438    #[inline]
439    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
440        Two(generic::Two::new(needle1, needle2))
441    }
442
443    /// Returns true when this implementation is available in the current
444    /// environment.
445    ///
446    /// When this is true, it is guaranteed that [`Two::new`] will return
447    /// a `Some` value. Similarly, when it is false, it is guaranteed that
448    /// `Two::new` will return a `None` value.
449    ///
450    /// Note also that for the lifetime of a single program, if this returns
451    /// true then it will always return true.
452    #[inline]
453    pub fn is_available() -> bool {
454        #[cfg(target_feature = "sse2")]
455        {
456            true
457        }
458        #[cfg(not(target_feature = "sse2"))]
459        {
460            false
461        }
462    }
463
464    /// Return the first occurrence of one of the needle bytes in the given
465    /// haystack. If no such occurrence exists, then `None` is returned.
466    ///
467    /// The occurrence is reported as an offset into `haystack`. Its maximum
468    /// value is `haystack.len() - 1`.
469    #[inline]
470    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
471        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
472        // falls within the bounds of the start and end pointers.
473        unsafe {
474            generic::search_slice_with_raw(haystack, |s, e| {
475                self.find_raw(s, e)
476            })
477        }
478    }
479
480    /// Return the last occurrence of one of the needle bytes in the given
481    /// haystack. If no such occurrence exists, then `None` is returned.
482    ///
483    /// The occurrence is reported as an offset into `haystack`. Its maximum
484    /// value is `haystack.len() - 1`.
485    #[inline]
486    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
487        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
488        // falls within the bounds of the start and end pointers.
489        unsafe {
490            generic::search_slice_with_raw(haystack, |s, e| {
491                self.rfind_raw(s, e)
492            })
493        }
494    }
495
496    /// Like `find`, but accepts and returns raw pointers.
497    ///
498    /// When a match is found, the pointer returned is guaranteed to be
499    /// `>= start` and `< end`.
500    ///
501    /// This routine is useful if you're already using raw pointers and would
502    /// like to avoid converting back to a slice before executing a search.
503    ///
504    /// # Safety
505    ///
506    /// * Both `start` and `end` must be valid for reads.
507    /// * Both `start` and `end` must point to an initialized value.
508    /// * Both `start` and `end` must point to the same allocated object and
509    /// must either be in bounds or at most one byte past the end of the
510    /// allocated object.
511    /// * Both `start` and `end` must be _derived from_ a pointer to the same
512    /// object.
513    /// * The distance between `start` and `end` must not overflow `isize`.
514    /// * The distance being in bounds must not rely on "wrapping around" the
515    /// address space.
516    ///
517    /// Note that callers may pass a pair of pointers such that `start >= end`.
518    /// In that case, `None` will always be returned.
519    #[inline]
520    pub unsafe fn find_raw(
521        &self,
522        start: *const u8,
523        end: *const u8,
524    ) -> Option<*const u8> {
525        if start >= end {
526            return None;
527        }
528        if end.distance(start) < __m128i::BYTES {
529            // SAFETY: We require the caller to pass valid start/end pointers.
530            return generic::fwd_byte_by_byte(start, end, |b| {
531                b == self.0.needle1() || b == self.0.needle2()
532            });
533        }
534        // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
535        // Also, we've checked that our haystack is big enough to run on the
536        // vector routine. Pointer validity is caller's responsibility.
537        //
538        // Note that we could call `self.0.find_raw` directly here. But that
539        // means we'd have to annotate this routine with `target_feature`.
540        // Which is fine, because this routine is `unsafe` anyway and the
541        // `target_feature` obligation is met by virtue of building a `Two`.
542        // The real problem is that a routine with a `target_feature`
543        // annotation generally can't be inlined into caller code unless the
544        // caller code has the same target feature annotations. Which is maybe
545        // okay for SSE2, but we do the same thing for AVX2 where caller code
546        // probably usually doesn't have AVX2 enabled. That means that this
547        // routine can be inlined which will handle some of the short-haystack
548        // cases above without touching the architecture specific code.
549        self.find_raw_impl(start, end)
550    }
551
552    /// Like `rfind`, but accepts and returns raw pointers.
553    ///
554    /// When a match is found, the pointer returned is guaranteed to be
555    /// `>= start` and `< end`.
556    ///
557    /// This routine is useful if you're already using raw pointers and would
558    /// like to avoid converting back to a slice before executing a search.
559    ///
560    /// # Safety
561    ///
562    /// * Both `start` and `end` must be valid for reads.
563    /// * Both `start` and `end` must point to an initialized value.
564    /// * Both `start` and `end` must point to the same allocated object and
565    /// must either be in bounds or at most one byte past the end of the
566    /// allocated object.
567    /// * Both `start` and `end` must be _derived from_ a pointer to the same
568    /// object.
569    /// * The distance between `start` and `end` must not overflow `isize`.
570    /// * The distance being in bounds must not rely on "wrapping around" the
571    /// address space.
572    ///
573    /// Note that callers may pass a pair of pointers such that `start >= end`.
574    /// In that case, `None` will always be returned.
575    #[inline]
576    pub unsafe fn rfind_raw(
577        &self,
578        start: *const u8,
579        end: *const u8,
580    ) -> Option<*const u8> {
581        if start >= end {
582            return None;
583        }
584        if end.distance(start) < __m128i::BYTES {
585            // SAFETY: We require the caller to pass valid start/end pointers.
586            return generic::rev_byte_by_byte(start, end, |b| {
587                b == self.0.needle1() || b == self.0.needle2()
588            });
589        }
590        // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
591        // Also, we've checked that our haystack is big enough to run on the
592        // vector routine. Pointer validity is caller's responsibility.
593        //
594        // See note in forward routine above for why we don't just call
595        // `self.0.rfind_raw` directly here.
596        self.rfind_raw_impl(start, end)
597    }
598
599    /// Execute a search using SSE2 vectors and routines.
600    ///
601    /// # Safety
602    ///
603    /// Same as [`Two::find_raw`], except the distance between `start` and
604    /// `end` must be at least the size of an SSE2 vector (in bytes).
605    ///
606    /// (The target feature safety obligation is automatically fulfilled by
607    /// virtue of being a method on `Two`, which can only be constructed
608    /// when it is safe to call `sse2` routines.)
609    #[target_feature(enable = "sse2")]
610    #[inline]
611    unsafe fn find_raw_impl(
612        &self,
613        start: *const u8,
614        end: *const u8,
615    ) -> Option<*const u8> {
616        self.0.find_raw(start, end)
617    }
618
619    /// Execute a search using SSE2 vectors and routines.
620    ///
621    /// # Safety
622    ///
623    /// Same as [`Two::rfind_raw`], except the distance between `start` and
624    /// `end` must be at least the size of an SSE2 vector (in bytes).
625    ///
626    /// (The target feature safety obligation is automatically fulfilled by
627    /// virtue of being a method on `Two`, which can only be constructed
628    /// when it is safe to call `sse2` routines.)
629    #[target_feature(enable = "sse2")]
630    #[inline]
631    unsafe fn rfind_raw_impl(
632        &self,
633        start: *const u8,
634        end: *const u8,
635    ) -> Option<*const u8> {
636        self.0.rfind_raw(start, end)
637    }
638
639    /// Returns an iterator over all occurrences of the needle bytes in the
640    /// given haystack.
641    ///
642    /// The iterator returned implements `DoubleEndedIterator`. This means it
643    /// can also be used to find occurrences in reverse order.
644    #[inline]
645    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
646        TwoIter { searcher: self, it: generic::Iter::new(haystack) }
647    }
648}
649
650/// An iterator over all occurrences of two possible bytes in a haystack.
651///
652/// This iterator implements `DoubleEndedIterator`, which means it can also be
653/// used to find occurrences in reverse order.
654///
655/// This iterator is created by the [`Two::iter`] method.
656///
657/// The lifetime parameters are as follows:
658///
659/// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
660/// * `'h` refers to the lifetime of the haystack being searched.
661#[derive(Clone, Debug)]
662pub struct TwoIter<'a, 'h> {
663    searcher: &'a Two,
664    it: generic::Iter<'h>,
665}
666
667impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
668    type Item = usize;
669
670    #[inline]
671    fn next(&mut self) -> Option<usize> {
672        // SAFETY: We rely on the generic iterator to provide valid start
673        // and end pointers, but we guarantee that any pointer returned by
674        // 'find_raw' falls within the bounds of the start and end pointer.
675        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
676    }
677
678    #[inline]
679    fn size_hint(&self) -> (usize, Option<usize>) {
680        self.it.size_hint()
681    }
682}
683
684impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
685    #[inline]
686    fn next_back(&mut self) -> Option<usize> {
687        // SAFETY: We rely on the generic iterator to provide valid start
688        // and end pointers, but we guarantee that any pointer returned by
689        // 'rfind_raw' falls within the bounds of the start and end pointer.
690        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
691    }
692}
693
694impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
695
696/// Finds all occurrences of three bytes in a haystack.
697///
698/// That is, this reports matches of one of three possible bytes. For example,
699/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
700/// `0`, `2`, `3`, `4` and `5`.
701#[derive(Clone, Copy, Debug)]
702pub struct Three(generic::Three<__m128i>);
703
704impl Three {
705    /// Create a new searcher that finds occurrences of the needle bytes given.
706    ///
707    /// This particular searcher is specialized to use SSE2 vector instructions
708    /// that typically make it quite fast.
709    ///
710    /// If SSE2 is unavailable in the current environment, then `None` is
711    /// returned.
712    #[inline]
713    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
714        if Three::is_available() {
715            // SAFETY: we check that sse2 is available above.
716            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
717        } else {
718            None
719        }
720    }
721
722    /// Create a new finder specific to SSE2 vectors and routines without
723    /// checking that SSE2 is available.
724    ///
725    /// # Safety
726    ///
727    /// Callers must guarantee that it is safe to execute `sse2` instructions
728    /// in the current environment.
729    ///
730    /// Note that it is a common misconception that if one compiles for an
731    /// `x86_64` target, then they therefore automatically have access to SSE2
732    /// instructions. While this is almost always the case, it isn't true in
733    /// 100% of cases.
734    #[target_feature(enable = "sse2")]
735    #[inline]
736    pub unsafe fn new_unchecked(
737        needle1: u8,
738        needle2: u8,
739        needle3: u8,
740    ) -> Three {
741        Three(generic::Three::new(needle1, needle2, needle3))
742    }
743
744    /// Returns true when this implementation is available in the current
745    /// environment.
746    ///
747    /// When this is true, it is guaranteed that [`Three::new`] will return
748    /// a `Some` value. Similarly, when it is false, it is guaranteed that
749    /// `Three::new` will return a `None` value.
750    ///
751    /// Note also that for the lifetime of a single program, if this returns
752    /// true then it will always return true.
753    #[inline]
754    pub fn is_available() -> bool {
755        #[cfg(target_feature = "sse2")]
756        {
757            true
758        }
759        #[cfg(not(target_feature = "sse2"))]
760        {
761            false
762        }
763    }
764
765    /// Return the first occurrence of one of the needle bytes in the given
766    /// haystack. If no such occurrence exists, then `None` is returned.
767    ///
768    /// The occurrence is reported as an offset into `haystack`. Its maximum
769    /// value is `haystack.len() - 1`.
770    #[inline]
771    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
772        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
773        // falls within the bounds of the start and end pointers.
774        unsafe {
775            generic::search_slice_with_raw(haystack, |s, e| {
776                self.find_raw(s, e)
777            })
778        }
779    }
780
781    /// Return the last occurrence of one of the needle bytes in the given
782    /// haystack. If no such occurrence exists, then `None` is returned.
783    ///
784    /// The occurrence is reported as an offset into `haystack`. Its maximum
785    /// value is `haystack.len() - 1`.
786    #[inline]
787    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
788        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
789        // falls within the bounds of the start and end pointers.
790        unsafe {
791            generic::search_slice_with_raw(haystack, |s, e| {
792                self.rfind_raw(s, e)
793            })
794        }
795    }
796
797    /// Like `find`, but accepts and returns raw pointers.
798    ///
799    /// When a match is found, the pointer returned is guaranteed to be
800    /// `>= start` and `< end`.
801    ///
802    /// This routine is useful if you're already using raw pointers and would
803    /// like to avoid converting back to a slice before executing a search.
804    ///
805    /// # Safety
806    ///
807    /// * Both `start` and `end` must be valid for reads.
808    /// * Both `start` and `end` must point to an initialized value.
809    /// * Both `start` and `end` must point to the same allocated object and
810    /// must either be in bounds or at most one byte past the end of the
811    /// allocated object.
812    /// * Both `start` and `end` must be _derived from_ a pointer to the same
813    /// object.
814    /// * The distance between `start` and `end` must not overflow `isize`.
815    /// * The distance being in bounds must not rely on "wrapping around" the
816    /// address space.
817    ///
818    /// Note that callers may pass a pair of pointers such that `start >= end`.
819    /// In that case, `None` will always be returned.
820    #[inline]
821    pub unsafe fn find_raw(
822        &self,
823        start: *const u8,
824        end: *const u8,
825    ) -> Option<*const u8> {
826        if start >= end {
827            return None;
828        }
829        if end.distance(start) < __m128i::BYTES {
830            // SAFETY: We require the caller to pass valid start/end pointers.
831            return generic::fwd_byte_by_byte(start, end, |b| {
832                b == self.0.needle1()
833                    || b == self.0.needle2()
834                    || b == self.0.needle3()
835            });
836        }
837        // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
838        // Also, we've checked that our haystack is big enough to run on the
839        // vector routine. Pointer validity is caller's responsibility.
840        //
841        // Note that we could call `self.0.find_raw` directly here. But that
842        // means we'd have to annotate this routine with `target_feature`.
843        // Which is fine, because this routine is `unsafe` anyway and the
844        // `target_feature` obligation is met by virtue of building a `Three`.
845        // The real problem is that a routine with a `target_feature`
846        // annotation generally can't be inlined into caller code unless the
847        // caller code has the same target feature annotations. Which is maybe
848        // okay for SSE2, but we do the same thing for AVX2 where caller code
849        // probably usually doesn't have AVX2 enabled. That means that this
850        // routine can be inlined which will handle some of the short-haystack
851        // cases above without touching the architecture specific code.
852        self.find_raw_impl(start, end)
853    }
854
855    /// Like `rfind`, but accepts and returns raw pointers.
856    ///
857    /// When a match is found, the pointer returned is guaranteed to be
858    /// `>= start` and `< end`.
859    ///
860    /// This routine is useful if you're already using raw pointers and would
861    /// like to avoid converting back to a slice before executing a search.
862    ///
863    /// # Safety
864    ///
865    /// * Both `start` and `end` must be valid for reads.
866    /// * Both `start` and `end` must point to an initialized value.
867    /// * Both `start` and `end` must point to the same allocated object and
868    /// must either be in bounds or at most one byte past the end of the
869    /// allocated object.
870    /// * Both `start` and `end` must be _derived from_ a pointer to the same
871    /// object.
872    /// * The distance between `start` and `end` must not overflow `isize`.
873    /// * The distance being in bounds must not rely on "wrapping around" the
874    /// address space.
875    ///
876    /// Note that callers may pass a pair of pointers such that `start >= end`.
877    /// In that case, `None` will always be returned.
878    #[inline]
879    pub unsafe fn rfind_raw(
880        &self,
881        start: *const u8,
882        end: *const u8,
883    ) -> Option<*const u8> {
884        if start >= end {
885            return None;
886        }
887        if end.distance(start) < __m128i::BYTES {
888            // SAFETY: We require the caller to pass valid start/end pointers.
889            return generic::rev_byte_by_byte(start, end, |b| {
890                b == self.0.needle1()
891                    || b == self.0.needle2()
892                    || b == self.0.needle3()
893            });
894        }
895        // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
896        // Also, we've checked that our haystack is big enough to run on the
897        // vector routine. Pointer validity is caller's responsibility.
898        //
899        // See note in forward routine above for why we don't just call
900        // `self.0.rfind_raw` directly here.
901        self.rfind_raw_impl(start, end)
902    }
903
904    /// Execute a search using SSE2 vectors and routines.
905    ///
906    /// # Safety
907    ///
908    /// Same as [`Three::find_raw`], except the distance between `start` and
909    /// `end` must be at least the size of an SSE2 vector (in bytes).
910    ///
911    /// (The target feature safety obligation is automatically fulfilled by
912    /// virtue of being a method on `Three`, which can only be constructed
913    /// when it is safe to call `sse2` routines.)
914    #[target_feature(enable = "sse2")]
915    #[inline]
916    unsafe fn find_raw_impl(
917        &self,
918        start: *const u8,
919        end: *const u8,
920    ) -> Option<*const u8> {
921        self.0.find_raw(start, end)
922    }
923
924    /// Execute a search using SSE2 vectors and routines.
925    ///
926    /// # Safety
927    ///
928    /// Same as [`Three::rfind_raw`], except the distance between `start` and
929    /// `end` must be at least the size of an SSE2 vector (in bytes).
930    ///
931    /// (The target feature safety obligation is automatically fulfilled by
932    /// virtue of being a method on `Three`, which can only be constructed
933    /// when it is safe to call `sse2` routines.)
934    #[target_feature(enable = "sse2")]
935    #[inline]
936    unsafe fn rfind_raw_impl(
937        &self,
938        start: *const u8,
939        end: *const u8,
940    ) -> Option<*const u8> {
941        self.0.rfind_raw(start, end)
942    }
943
944    /// Returns an iterator over all occurrences of the needle byte in the
945    /// given haystack.
946    ///
947    /// The iterator returned implements `DoubleEndedIterator`. This means it
948    /// can also be used to find occurrences in reverse order.
949    #[inline]
950    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
951        ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
952    }
953}
954
955/// An iterator over all occurrences of three possible bytes in a haystack.
956///
957/// This iterator implements `DoubleEndedIterator`, which means it can also be
958/// used to find occurrences in reverse order.
959///
960/// This iterator is created by the [`Three::iter`] method.
961///
962/// The lifetime parameters are as follows:
963///
964/// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
965/// * `'h` refers to the lifetime of the haystack being searched.
966#[derive(Clone, Debug)]
967pub struct ThreeIter<'a, 'h> {
968    searcher: &'a Three,
969    it: generic::Iter<'h>,
970}
971
972impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
973    type Item = usize;
974
975    #[inline]
976    fn next(&mut self) -> Option<usize> {
977        // SAFETY: We rely on the generic iterator to provide valid start
978        // and end pointers, but we guarantee that any pointer returned by
979        // 'find_raw' falls within the bounds of the start and end pointer.
980        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
981    }
982
983    #[inline]
984    fn size_hint(&self) -> (usize, Option<usize>) {
985        self.it.size_hint()
986    }
987}
988
989impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
990    #[inline]
991    fn next_back(&mut self) -> Option<usize> {
992        // SAFETY: We rely on the generic iterator to provide valid start
993        // and end pointers, but we guarantee that any pointer returned by
994        // 'rfind_raw' falls within the bounds of the start and end pointer.
995        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
996    }
997}
998
999impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
1000
1001#[cfg(test)]
1002mod tests {
1003    use super::*;
1004
1005    define_memchr_quickcheck!(super);
1006
1007    #[test]
1008    fn forward_one() {
1009        crate::tests::memchr::Runner::new(1).forward_iter(
1010            |haystack, needles| {
1011                Some(One::new(needles[0])?.iter(haystack).collect())
1012            },
1013        )
1014    }
1015
1016    #[test]
1017    fn reverse_one() {
1018        crate::tests::memchr::Runner::new(1).reverse_iter(
1019            |haystack, needles| {
1020                Some(One::new(needles[0])?.iter(haystack).rev().collect())
1021            },
1022        )
1023    }
1024
1025    #[test]
1026    fn count_one() {
1027        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
1028            Some(One::new(needles[0])?.iter(haystack).count())
1029        })
1030    }
1031
1032    #[test]
1033    fn forward_two() {
1034        crate::tests::memchr::Runner::new(2).forward_iter(
1035            |haystack, needles| {
1036                let n1 = needles.get(0).copied()?;
1037                let n2 = needles.get(1).copied()?;
1038                Some(Two::new(n1, n2)?.iter(haystack).collect())
1039            },
1040        )
1041    }
1042
1043    #[test]
1044    fn reverse_two() {
1045        crate::tests::memchr::Runner::new(2).reverse_iter(
1046            |haystack, needles| {
1047                let n1 = needles.get(0).copied()?;
1048                let n2 = needles.get(1).copied()?;
1049                Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
1050            },
1051        )
1052    }
1053
1054    #[test]
1055    fn forward_three() {
1056        crate::tests::memchr::Runner::new(3).forward_iter(
1057            |haystack, needles| {
1058                let n1 = needles.get(0).copied()?;
1059                let n2 = needles.get(1).copied()?;
1060                let n3 = needles.get(2).copied()?;
1061                Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
1062            },
1063        )
1064    }
1065
1066    #[test]
1067    fn reverse_three() {
1068        crate::tests::memchr::Runner::new(3).reverse_iter(
1069            |haystack, needles| {
1070                let n1 = needles.get(0).copied()?;
1071                let n2 = needles.get(1).copied()?;
1072                let n3 = needles.get(2).copied()?;
1073                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
1074            },
1075        )
1076    }
1077}