memchr/arch/x86_64/
memchr.rs

1/*!
2Wrapper routines for `memchr` and friends.
3
4These routines efficiently dispatch to the best implementation based on what
5the CPU supports.
6*/
7
8/// Provides a way to run a memchr-like function while amortizing the cost of
9/// runtime CPU feature detection.
10///
11/// This works by loading a function pointer from an atomic global. Initially,
12/// this global is set to a function that does CPU feature detection. For
13/// example, if AVX2 is enabled, then the AVX2 implementation is used.
14/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
15/// in some niche cases, if SSE2 isn't available, then the architecture
16/// independent fallback implementation is used.)
17///
18/// After the first call to this function, the atomic global is replaced with
19/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
20/// will directly call the chosen routine instead of needing to go through the
21/// CPU feature detection branching again.
22///
23/// This particular macro is specifically written to provide the implementation
24/// of functions with the following signature:
25///
26/// ```ignore
27/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
28/// ```
29///
30/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
31/// `needle3`, respectively. The `start` and `end` parameters correspond to the
32/// start and end of the haystack, respectively.
33///
34/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
35/// that the function is compatible with our lower level iterator logic that
36/// operates on raw pointers. We use this macro to implement "raw" memchr
37/// routines with the signature above, and then define memchr routines using
38/// regular slices on top of them.
39///
40/// Note that we use `#[cfg(target_feature = "sse2")]` below even though
41/// it shouldn't be strictly necessary because without it, it seems to
42/// cause the compiler to blow up. I guess it can't handle a function
43/// pointer being created with a sse target feature? Dunno. See the
44/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
45/// this.
46///
47/// # Safety
48///
49/// Primarily callers must that `$fnty` is a correct function pointer type and
50/// not something else.
51///
52/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
53/// routine that returns a valid function pointer when a match is found. That
54/// is, a pointer that is `>= start` and `< end`.
55///
56/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
57/// correspond to valid pointers.
58macro_rules! unsafe_ifunc {
59    (
60        $memchrty:ident,
61        $memchrfind:ident,
62        $fnty:ty,
63        $retty:ty,
64        $hay_start:ident,
65        $hay_end:ident,
66        $($needle:ident),+
67    ) => {{
68        #![allow(unused_unsafe)]
69
70        use core::sync::atomic::{AtomicPtr, Ordering};
71
72        type Fn = *mut ();
73        type RealFn = $fnty;
74        static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
75
76        #[cfg(target_feature = "sse2")]
77        #[target_feature(enable = "sse2", enable = "avx2")]
78        unsafe fn find_avx2(
79            $($needle: u8),+,
80            $hay_start: *const u8,
81            $hay_end: *const u8,
82        ) -> $retty {
83            use crate::arch::x86_64::avx2::memchr::$memchrty;
84            $memchrty::new_unchecked($($needle),+)
85                .$memchrfind($hay_start, $hay_end)
86        }
87
88        #[cfg(target_feature = "sse2")]
89        #[target_feature(enable = "sse2")]
90        unsafe fn find_sse2(
91            $($needle: u8),+,
92            $hay_start: *const u8,
93            $hay_end: *const u8,
94        ) -> $retty {
95            use crate::arch::x86_64::sse2::memchr::$memchrty;
96            $memchrty::new_unchecked($($needle),+)
97                .$memchrfind($hay_start, $hay_end)
98        }
99
100        unsafe fn find_fallback(
101            $($needle: u8),+,
102            $hay_start: *const u8,
103            $hay_end: *const u8,
104        ) -> $retty {
105            use crate::arch::all::memchr::$memchrty;
106            $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
107        }
108
109        unsafe fn detect(
110            $($needle: u8),+,
111            $hay_start: *const u8,
112            $hay_end: *const u8,
113        ) -> $retty {
114            let fun = {
115                #[cfg(not(target_feature = "sse2"))]
116                {
117                    debug!(
118                        "no sse2 feature available, using fallback for {}",
119                        stringify!($memchrty),
120                    );
121                    find_fallback as RealFn
122                }
123                #[cfg(target_feature = "sse2")]
124                {
125                    use crate::arch::x86_64::{sse2, avx2};
126                    if avx2::memchr::$memchrty::is_available() {
127                        debug!("chose AVX2 for {}", stringify!($memchrty));
128                        find_avx2 as RealFn
129                    } else if sse2::memchr::$memchrty::is_available() {
130                        debug!("chose SSE2 for {}", stringify!($memchrty));
131                        find_sse2 as RealFn
132                    } else {
133                        debug!("chose fallback for {}", stringify!($memchrty));
134                        find_fallback as RealFn
135                    }
136                }
137            };
138            FN.store(fun as Fn, Ordering::Relaxed);
139            // SAFETY: The only thing we need to uphold here is the
140            // `#[target_feature]` requirements. Since we check is_available
141            // above before using the corresponding implementation, we are
142            // guaranteed to only call code that is supported on the current
143            // CPU.
144            fun($($needle),+, $hay_start, $hay_end)
145        }
146
147        // SAFETY: By virtue of the caller contract, RealFn is a function
148        // pointer, which is always safe to transmute with a *mut (). Also,
149        // since we use $memchrty::is_available, it is guaranteed to be safe
150        // to call $memchrty::$memchrfind.
151        unsafe {
152            let fun = FN.load(Ordering::Relaxed);
153            core::mem::transmute::<Fn, RealFn>(fun)(
154                $($needle),+,
155                $hay_start,
156                $hay_end,
157            )
158        }
159    }};
160}
161
162// The routines below dispatch to AVX2, SSE2 or a fallback routine based on
163// what's available in the current environment. The secret sauce here is that
164// we only check for which one to use approximately once, and then "cache" that
165// choice into a global function pointer. Subsequent invocations then just call
166// the appropriate function directly.
167
168/// memchr, but using raw pointers to represent the haystack.
169///
170/// # Safety
171///
172/// Pointers must be valid. See `One::find_raw`.
173#[inline(always)]
174pub(crate) fn memchr_raw(
175    n1: u8,
176    start: *const u8,
177    end: *const u8,
178) -> Option<*const u8> {
179    // SAFETY: We provide a valid function pointer type.
180    unsafe_ifunc!(
181        One,
182        find_raw,
183        unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
184        Option<*const u8>,
185        start,
186        end,
187        n1
188    )
189}
190
191/// memrchr, but using raw pointers to represent the haystack.
192///
193/// # Safety
194///
195/// Pointers must be valid. See `One::rfind_raw`.
196#[inline(always)]
197pub(crate) fn memrchr_raw(
198    n1: u8,
199    start: *const u8,
200    end: *const u8,
201) -> Option<*const u8> {
202    // SAFETY: We provide a valid function pointer type.
203    unsafe_ifunc!(
204        One,
205        rfind_raw,
206        unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
207        Option<*const u8>,
208        start,
209        end,
210        n1
211    )
212}
213
214/// memchr2, but using raw pointers to represent the haystack.
215///
216/// # Safety
217///
218/// Pointers must be valid. See `Two::find_raw`.
219#[inline(always)]
220pub(crate) fn memchr2_raw(
221    n1: u8,
222    n2: u8,
223    start: *const u8,
224    end: *const u8,
225) -> Option<*const u8> {
226    // SAFETY: We provide a valid function pointer type.
227    unsafe_ifunc!(
228        Two,
229        find_raw,
230        unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
231        Option<*const u8>,
232        start,
233        end,
234        n1,
235        n2
236    )
237}
238
239/// memrchr2, but using raw pointers to represent the haystack.
240///
241/// # Safety
242///
243/// Pointers must be valid. See `Two::rfind_raw`.
244#[inline(always)]
245pub(crate) fn memrchr2_raw(
246    n1: u8,
247    n2: u8,
248    start: *const u8,
249    end: *const u8,
250) -> Option<*const u8> {
251    // SAFETY: We provide a valid function pointer type.
252    unsafe_ifunc!(
253        Two,
254        rfind_raw,
255        unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
256        Option<*const u8>,
257        start,
258        end,
259        n1,
260        n2
261    )
262}
263
264/// memchr3, but using raw pointers to represent the haystack.
265///
266/// # Safety
267///
268/// Pointers must be valid. See `Three::find_raw`.
269#[inline(always)]
270pub(crate) fn memchr3_raw(
271    n1: u8,
272    n2: u8,
273    n3: u8,
274    start: *const u8,
275    end: *const u8,
276) -> Option<*const u8> {
277    // SAFETY: We provide a valid function pointer type.
278    unsafe_ifunc!(
279        Three,
280        find_raw,
281        unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
282        Option<*const u8>,
283        start,
284        end,
285        n1,
286        n2,
287        n3
288    )
289}
290
291/// memrchr3, but using raw pointers to represent the haystack.
292///
293/// # Safety
294///
295/// Pointers must be valid. See `Three::rfind_raw`.
296#[inline(always)]
297pub(crate) fn memrchr3_raw(
298    n1: u8,
299    n2: u8,
300    n3: u8,
301    start: *const u8,
302    end: *const u8,
303) -> Option<*const u8> {
304    // SAFETY: We provide a valid function pointer type.
305    unsafe_ifunc!(
306        Three,
307        rfind_raw,
308        unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
309        Option<*const u8>,
310        start,
311        end,
312        n1,
313        n2,
314        n3
315    )
316}
317
318/// Count all matching bytes, but using raw pointers to represent the haystack.
319///
320/// # Safety
321///
322/// Pointers must be valid. See `One::count_raw`.
323#[inline(always)]
324pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
325    // SAFETY: We provide a valid function pointer type.
326    unsafe_ifunc!(
327        One,
328        count_raw,
329        unsafe fn(u8, *const u8, *const u8) -> usize,
330        usize,
331        start,
332        end,
333        n1
334    )
335}