1use core::{
8 fmt::Debug,
9 panic::{RefUnwindSafe, UnwindSafe},
10};
11
12pub(crate) trait Vector:
29 Copy + Debug + Send + Sync + UnwindSafe + RefUnwindSafe
30{
31 const BITS: usize;
33 const BYTES: usize;
36
37 unsafe fn splat(byte: u8) -> Self;
45
46 unsafe fn load_unaligned(data: *const u8) -> Self;
57
58 unsafe fn is_zero(self) -> bool;
65
66 unsafe fn cmpeq(self, vector2: Self) -> Self;
75
76 unsafe fn and(self, vector2: Self) -> Self;
84
85 #[allow(dead_code)] unsafe fn or(self, vector2: Self) -> Self;
94
95 unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self;
103
104 unsafe fn shift_in_one_byte(self, vector2: Self) -> Self;
125
126 unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self;
147
148 unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self;
169
170 unsafe fn shuffle_bytes(self, indices: Self) -> Self;
181
182 unsafe fn for_each_64bit_lane<T>(
204 self,
205 f: impl FnMut(usize, u64) -> Option<T>,
206 ) -> Option<T>;
207}
208
209pub(crate) trait FatVector: Vector {
233 type Half: Vector;
234
235 unsafe fn load_half_unaligned(data: *const u8) -> Self;
247
248 unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self;
256
257 unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self;
265
266 unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self;
274
275 unsafe fn swap_halves(self) -> Self;
282
283 unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self;
291
292 unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self;
300
301 unsafe fn for_each_low_64bit_lane<T>(
314 self,
315 vector2: Self,
316 f: impl FnMut(usize, u64) -> Option<T>,
317 ) -> Option<T>;
318}
319
320#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
321mod x86_64_ssse3 {
322 use core::arch::x86_64::*;
323
324 use crate::util::int::{I32, I8};
325
326 use super::Vector;
327
328 impl Vector for __m128i {
329 const BITS: usize = 128;
330 const BYTES: usize = 16;
331
332 #[inline(always)]
333 unsafe fn splat(byte: u8) -> __m128i {
334 _mm_set1_epi8(i8::from_bits(byte))
335 }
336
337 #[inline(always)]
338 unsafe fn load_unaligned(data: *const u8) -> __m128i {
339 _mm_loadu_si128(data.cast::<__m128i>())
340 }
341
342 #[inline(always)]
343 unsafe fn is_zero(self) -> bool {
344 let cmp = self.cmpeq(Self::splat(0));
345 _mm_movemask_epi8(cmp).to_bits() == 0xFFFF
346 }
347
348 #[inline(always)]
349 unsafe fn cmpeq(self, vector2: Self) -> __m128i {
350 _mm_cmpeq_epi8(self, vector2)
351 }
352
353 #[inline(always)]
354 unsafe fn and(self, vector2: Self) -> __m128i {
355 _mm_and_si128(self, vector2)
356 }
357
358 #[inline(always)]
359 unsafe fn or(self, vector2: Self) -> __m128i {
360 _mm_or_si128(self, vector2)
361 }
362
363 #[inline(always)]
364 unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self {
365 let lomask = Self::splat(0xF);
370 _mm_srli_epi16(self, BITS).and(lomask)
371 }
372
373 #[inline(always)]
374 unsafe fn shift_in_one_byte(self, vector2: Self) -> Self {
375 _mm_alignr_epi8(self, vector2, 15)
376 }
377
378 #[inline(always)]
379 unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self {
380 _mm_alignr_epi8(self, vector2, 14)
381 }
382
383 #[inline(always)]
384 unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self {
385 _mm_alignr_epi8(self, vector2, 13)
386 }
387
388 #[inline(always)]
389 unsafe fn shuffle_bytes(self, indices: Self) -> Self {
390 _mm_shuffle_epi8(self, indices)
391 }
392
393 #[inline(always)]
394 unsafe fn for_each_64bit_lane<T>(
395 self,
396 mut f: impl FnMut(usize, u64) -> Option<T>,
397 ) -> Option<T> {
398 let lanes: [u64; 2] = core::mem::transmute(self);
402 if let Some(t) = f(0, lanes[0]) {
403 return Some(t);
404 }
405 if let Some(t) = f(1, lanes[1]) {
406 return Some(t);
407 }
408 None
409 }
410 }
411}
412
413#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
414mod x86_64_avx2 {
415 use core::arch::x86_64::*;
416
417 use crate::util::int::{I32, I64, I8};
418
419 use super::{FatVector, Vector};
420
421 impl Vector for __m256i {
422 const BITS: usize = 256;
423 const BYTES: usize = 32;
424
425 #[inline(always)]
426 unsafe fn splat(byte: u8) -> __m256i {
427 _mm256_set1_epi8(i8::from_bits(byte))
428 }
429
430 #[inline(always)]
431 unsafe fn load_unaligned(data: *const u8) -> __m256i {
432 _mm256_loadu_si256(data.cast::<__m256i>())
433 }
434
435 #[inline(always)]
436 unsafe fn is_zero(self) -> bool {
437 let cmp = self.cmpeq(Self::splat(0));
438 _mm256_movemask_epi8(cmp).to_bits() == 0xFFFFFFFF
439 }
440
441 #[inline(always)]
442 unsafe fn cmpeq(self, vector2: Self) -> __m256i {
443 _mm256_cmpeq_epi8(self, vector2)
444 }
445
446 #[inline(always)]
447 unsafe fn and(self, vector2: Self) -> __m256i {
448 _mm256_and_si256(self, vector2)
449 }
450
451 #[inline(always)]
452 unsafe fn or(self, vector2: Self) -> __m256i {
453 _mm256_or_si256(self, vector2)
454 }
455
456 #[inline(always)]
457 unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self {
458 let lomask = Self::splat(0xF);
459 _mm256_srli_epi16(self, BITS).and(lomask)
460 }
461
462 #[inline(always)]
463 unsafe fn shift_in_one_byte(self, vector2: Self) -> Self {
464 let v = _mm256_permute2x128_si256(vector2, self, 0x21);
471 _mm256_alignr_epi8(self, v, 15)
472 }
473
474 #[inline(always)]
475 unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self {
476 let v = _mm256_permute2x128_si256(vector2, self, 0x21);
483 _mm256_alignr_epi8(self, v, 14)
484 }
485
486 #[inline(always)]
487 unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self {
488 let v = _mm256_permute2x128_si256(vector2, self, 0x21);
495 _mm256_alignr_epi8(self, v, 13)
496 }
497
498 #[inline(always)]
499 unsafe fn shuffle_bytes(self, indices: Self) -> Self {
500 _mm256_shuffle_epi8(self, indices)
501 }
502
503 #[inline(always)]
504 unsafe fn for_each_64bit_lane<T>(
505 self,
506 mut f: impl FnMut(usize, u64) -> Option<T>,
507 ) -> Option<T> {
508 let lane = _mm256_extract_epi64(self, 0).to_bits();
514 if let Some(t) = f(0, lane) {
515 return Some(t);
516 }
517 let lane = _mm256_extract_epi64(self, 1).to_bits();
518 if let Some(t) = f(1, lane) {
519 return Some(t);
520 }
521 let lane = _mm256_extract_epi64(self, 2).to_bits();
522 if let Some(t) = f(2, lane) {
523 return Some(t);
524 }
525 let lane = _mm256_extract_epi64(self, 3).to_bits();
526 if let Some(t) = f(3, lane) {
527 return Some(t);
528 }
529 None
530 }
531 }
532
533 impl FatVector for __m256i {
534 type Half = __m128i;
535
536 #[inline(always)]
537 unsafe fn load_half_unaligned(data: *const u8) -> Self {
538 let half = Self::Half::load_unaligned(data);
539 _mm256_broadcastsi128_si256(half)
540 }
541
542 #[inline(always)]
543 unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self {
544 _mm256_alignr_epi8(self, vector2, 15)
545 }
546
547 #[inline(always)]
548 unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self {
549 _mm256_alignr_epi8(self, vector2, 14)
550 }
551
552 #[inline(always)]
553 unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self {
554 _mm256_alignr_epi8(self, vector2, 13)
555 }
556
557 #[inline(always)]
558 unsafe fn swap_halves(self) -> Self {
559 _mm256_permute4x64_epi64(self, 0x4E)
560 }
561
562 #[inline(always)]
563 unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self {
564 _mm256_unpacklo_epi8(self, vector2)
565 }
566
567 #[inline(always)]
568 unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self {
569 _mm256_unpackhi_epi8(self, vector2)
570 }
571
572 #[inline(always)]
573 unsafe fn for_each_low_64bit_lane<T>(
574 self,
575 vector2: Self,
576 mut f: impl FnMut(usize, u64) -> Option<T>,
577 ) -> Option<T> {
578 let lane = _mm256_extract_epi64(self, 0).to_bits();
579 if let Some(t) = f(0, lane) {
580 return Some(t);
581 }
582 let lane = _mm256_extract_epi64(self, 1).to_bits();
583 if let Some(t) = f(1, lane) {
584 return Some(t);
585 }
586 let lane = _mm256_extract_epi64(vector2, 0).to_bits();
587 if let Some(t) = f(2, lane) {
588 return Some(t);
589 }
590 let lane = _mm256_extract_epi64(vector2, 1).to_bits();
591 if let Some(t) = f(3, lane) {
592 return Some(t);
593 }
594 None
595 }
596 }
597}
598
599#[cfg(all(
600 target_arch = "aarch64",
601 target_feature = "neon",
602 target_endian = "little"
603))]
604mod aarch64_neon {
605 use core::arch::aarch64::*;
606
607 use super::Vector;
608
609 impl Vector for uint8x16_t {
610 const BITS: usize = 128;
611 const BYTES: usize = 16;
612
613 #[inline(always)]
614 unsafe fn splat(byte: u8) -> uint8x16_t {
615 vdupq_n_u8(byte)
616 }
617
618 #[inline(always)]
619 unsafe fn load_unaligned(data: *const u8) -> uint8x16_t {
620 vld1q_u8(data)
621 }
622
623 #[inline(always)]
624 unsafe fn is_zero(self) -> bool {
625 let maxes = vreinterpretq_u64_u8(vpmaxq_u8(self, self));
629 vgetq_lane_u64(maxes, 0) == 0
630 }
631
632 #[inline(always)]
633 unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t {
634 vceqq_u8(self, vector2)
635 }
636
637 #[inline(always)]
638 unsafe fn and(self, vector2: Self) -> uint8x16_t {
639 vandq_u8(self, vector2)
640 }
641
642 #[inline(always)]
643 unsafe fn or(self, vector2: Self) -> uint8x16_t {
644 vorrq_u8(self, vector2)
645 }
646
647 #[inline(always)]
648 unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self {
649 debug_assert!(BITS <= 7);
650 vshrq_n_u8(self, BITS)
651 }
652
653 #[inline(always)]
654 unsafe fn shift_in_one_byte(self, vector2: Self) -> Self {
655 vextq_u8(vector2, self, 15)
656 }
657
658 #[inline(always)]
659 unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self {
660 vextq_u8(vector2, self, 14)
661 }
662
663 #[inline(always)]
664 unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self {
665 vextq_u8(vector2, self, 13)
666 }
667
668 #[inline(always)]
669 unsafe fn shuffle_bytes(self, indices: Self) -> Self {
670 vqtbl1q_u8(self, indices)
671 }
672
673 #[inline(always)]
674 unsafe fn for_each_64bit_lane<T>(
675 self,
676 mut f: impl FnMut(usize, u64) -> Option<T>,
677 ) -> Option<T> {
678 let this = vreinterpretq_u64_u8(self);
679 let lane = vgetq_lane_u64(this, 0);
680 if let Some(t) = f(0, lane) {
681 return Some(t);
682 }
683 let lane = vgetq_lane_u64(this, 1);
684 if let Some(t) = f(1, lane) {
685 return Some(t);
686 }
687 None
688 }
689 }
690}
691
692#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))]
693mod tests_x86_64_ssse3 {
694 use core::arch::x86_64::*;
695
696 use crate::util::int::{I32, U32};
697
698 use super::*;
699
700 fn is_runnable() -> bool {
701 std::is_x86_feature_detected!("ssse3")
702 }
703
704 #[target_feature(enable = "ssse3")]
705 unsafe fn load(lanes: [u8; 16]) -> __m128i {
706 __m128i::load_unaligned(&lanes as *const u8)
707 }
708
709 #[target_feature(enable = "ssse3")]
710 unsafe fn unload(v: __m128i) -> [u8; 16] {
711 [
712 _mm_extract_epi8(v, 0).to_bits().low_u8(),
713 _mm_extract_epi8(v, 1).to_bits().low_u8(),
714 _mm_extract_epi8(v, 2).to_bits().low_u8(),
715 _mm_extract_epi8(v, 3).to_bits().low_u8(),
716 _mm_extract_epi8(v, 4).to_bits().low_u8(),
717 _mm_extract_epi8(v, 5).to_bits().low_u8(),
718 _mm_extract_epi8(v, 6).to_bits().low_u8(),
719 _mm_extract_epi8(v, 7).to_bits().low_u8(),
720 _mm_extract_epi8(v, 8).to_bits().low_u8(),
721 _mm_extract_epi8(v, 9).to_bits().low_u8(),
722 _mm_extract_epi8(v, 10).to_bits().low_u8(),
723 _mm_extract_epi8(v, 11).to_bits().low_u8(),
724 _mm_extract_epi8(v, 12).to_bits().low_u8(),
725 _mm_extract_epi8(v, 13).to_bits().low_u8(),
726 _mm_extract_epi8(v, 14).to_bits().low_u8(),
727 _mm_extract_epi8(v, 15).to_bits().low_u8(),
728 ]
729 }
730
731 #[test]
732 fn vector_splat() {
733 #[target_feature(enable = "ssse3")]
734 unsafe fn test() {
735 let v = __m128i::splat(0xAF);
736 assert_eq!(
737 unload(v),
738 [
739 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
740 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF
741 ]
742 );
743 }
744 if !is_runnable() {
745 return;
746 }
747 unsafe { test() }
748 }
749
750 #[test]
751 fn vector_is_zero() {
752 #[target_feature(enable = "ssse3")]
753 unsafe fn test() {
754 let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
755 assert!(!v.is_zero());
756 let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
757 assert!(v.is_zero());
758 }
759 if !is_runnable() {
760 return;
761 }
762 unsafe { test() }
763 }
764
765 #[test]
766 fn vector_cmpeq() {
767 #[target_feature(enable = "ssse3")]
768 unsafe fn test() {
769 let v1 =
770 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]);
771 let v2 =
772 load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
773 assert_eq!(
774 unload(v1.cmpeq(v2)),
775 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF]
776 );
777 }
778 if !is_runnable() {
779 return;
780 }
781 unsafe { test() }
782 }
783
784 #[test]
785 fn vector_and() {
786 #[target_feature(enable = "ssse3")]
787 unsafe fn test() {
788 let v1 =
789 load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
790 let v2 =
791 load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
792 assert_eq!(
793 unload(v1.and(v2)),
794 [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
795 );
796 }
797 if !is_runnable() {
798 return;
799 }
800 unsafe { test() }
801 }
802
803 #[test]
804 fn vector_or() {
805 #[target_feature(enable = "ssse3")]
806 unsafe fn test() {
807 let v1 =
808 load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
809 let v2 =
810 load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
811 assert_eq!(
812 unload(v1.or(v2)),
813 [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
814 );
815 }
816 if !is_runnable() {
817 return;
818 }
819 unsafe { test() }
820 }
821
822 #[test]
823 fn vector_shift_8bit_lane_right() {
824 #[target_feature(enable = "ssse3")]
825 unsafe fn test() {
826 let v = load([
827 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
828 ]);
829 assert_eq!(
830 unload(v.shift_8bit_lane_right::<2>()),
831 [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
832 );
833 }
834 if !is_runnable() {
835 return;
836 }
837 unsafe { test() }
838 }
839
840 #[test]
841 fn vector_shift_in_one_byte() {
842 #[target_feature(enable = "ssse3")]
843 unsafe fn test() {
844 let v1 =
845 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
846 let v2 = load([
847 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
848 ]);
849 assert_eq!(
850 unload(v1.shift_in_one_byte(v2)),
851 [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
852 );
853 }
854 if !is_runnable() {
855 return;
856 }
857 unsafe { test() }
858 }
859
860 #[test]
861 fn vector_shift_in_two_bytes() {
862 #[target_feature(enable = "ssse3")]
863 unsafe fn test() {
864 let v1 =
865 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
866 let v2 = load([
867 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
868 ]);
869 assert_eq!(
870 unload(v1.shift_in_two_bytes(v2)),
871 [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
872 );
873 }
874 if !is_runnable() {
875 return;
876 }
877 unsafe { test() }
878 }
879
880 #[test]
881 fn vector_shift_in_three_bytes() {
882 #[target_feature(enable = "ssse3")]
883 unsafe fn test() {
884 let v1 =
885 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
886 let v2 = load([
887 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
888 ]);
889 assert_eq!(
890 unload(v1.shift_in_three_bytes(v2)),
891 [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
892 );
893 }
894 if !is_runnable() {
895 return;
896 }
897 unsafe { test() }
898 }
899
900 #[test]
901 fn vector_shuffle_bytes() {
902 #[target_feature(enable = "ssse3")]
903 unsafe fn test() {
904 let v1 =
905 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
906 let v2 =
907 load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]);
908 assert_eq!(
909 unload(v1.shuffle_bytes(v2)),
910 [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13],
911 );
912 }
913 if !is_runnable() {
914 return;
915 }
916 unsafe { test() }
917 }
918
919 #[test]
920 fn vector_for_each_64bit_lane() {
921 #[target_feature(enable = "ssse3")]
922 unsafe fn test() {
923 let v = load([
924 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
925 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
926 ]);
927 let mut lanes = [0u64; 2];
928 v.for_each_64bit_lane(|i, lane| {
929 lanes[i] = lane;
930 None::<()>
931 });
932 assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],);
933 }
934 if !is_runnable() {
935 return;
936 }
937 unsafe { test() }
938 }
939}
940
941#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))]
942mod tests_x86_64_avx2 {
943 use core::arch::x86_64::*;
944
945 use crate::util::int::{I32, U32};
946
947 use super::*;
948
949 fn is_runnable() -> bool {
950 std::is_x86_feature_detected!("avx2")
951 }
952
953 #[target_feature(enable = "avx2")]
954 unsafe fn load(lanes: [u8; 32]) -> __m256i {
955 __m256i::load_unaligned(&lanes as *const u8)
956 }
957
958 #[target_feature(enable = "avx2")]
959 unsafe fn load_half(lanes: [u8; 16]) -> __m256i {
960 __m256i::load_half_unaligned(&lanes as *const u8)
961 }
962
963 #[target_feature(enable = "avx2")]
964 unsafe fn unload(v: __m256i) -> [u8; 32] {
965 [
966 _mm256_extract_epi8(v, 0).to_bits().low_u8(),
967 _mm256_extract_epi8(v, 1).to_bits().low_u8(),
968 _mm256_extract_epi8(v, 2).to_bits().low_u8(),
969 _mm256_extract_epi8(v, 3).to_bits().low_u8(),
970 _mm256_extract_epi8(v, 4).to_bits().low_u8(),
971 _mm256_extract_epi8(v, 5).to_bits().low_u8(),
972 _mm256_extract_epi8(v, 6).to_bits().low_u8(),
973 _mm256_extract_epi8(v, 7).to_bits().low_u8(),
974 _mm256_extract_epi8(v, 8).to_bits().low_u8(),
975 _mm256_extract_epi8(v, 9).to_bits().low_u8(),
976 _mm256_extract_epi8(v, 10).to_bits().low_u8(),
977 _mm256_extract_epi8(v, 11).to_bits().low_u8(),
978 _mm256_extract_epi8(v, 12).to_bits().low_u8(),
979 _mm256_extract_epi8(v, 13).to_bits().low_u8(),
980 _mm256_extract_epi8(v, 14).to_bits().low_u8(),
981 _mm256_extract_epi8(v, 15).to_bits().low_u8(),
982 _mm256_extract_epi8(v, 16).to_bits().low_u8(),
983 _mm256_extract_epi8(v, 17).to_bits().low_u8(),
984 _mm256_extract_epi8(v, 18).to_bits().low_u8(),
985 _mm256_extract_epi8(v, 19).to_bits().low_u8(),
986 _mm256_extract_epi8(v, 20).to_bits().low_u8(),
987 _mm256_extract_epi8(v, 21).to_bits().low_u8(),
988 _mm256_extract_epi8(v, 22).to_bits().low_u8(),
989 _mm256_extract_epi8(v, 23).to_bits().low_u8(),
990 _mm256_extract_epi8(v, 24).to_bits().low_u8(),
991 _mm256_extract_epi8(v, 25).to_bits().low_u8(),
992 _mm256_extract_epi8(v, 26).to_bits().low_u8(),
993 _mm256_extract_epi8(v, 27).to_bits().low_u8(),
994 _mm256_extract_epi8(v, 28).to_bits().low_u8(),
995 _mm256_extract_epi8(v, 29).to_bits().low_u8(),
996 _mm256_extract_epi8(v, 30).to_bits().low_u8(),
997 _mm256_extract_epi8(v, 31).to_bits().low_u8(),
998 ]
999 }
1000
1001 #[test]
1002 fn vector_splat() {
1003 #[target_feature(enable = "avx2")]
1004 unsafe fn test() {
1005 let v = __m256i::splat(0xAF);
1006 assert_eq!(
1007 unload(v),
1008 [
1009 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
1010 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
1011 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
1012 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
1013 ]
1014 );
1015 }
1016 if !is_runnable() {
1017 return;
1018 }
1019 unsafe { test() }
1020 }
1021
1022 #[test]
1023 fn vector_is_zero() {
1024 #[target_feature(enable = "avx2")]
1025 unsafe fn test() {
1026 let v = load([
1027 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1029 ]);
1030 assert!(!v.is_zero());
1031 let v = load([
1032 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1033 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1034 ]);
1035 assert!(v.is_zero());
1036 }
1037 if !is_runnable() {
1038 return;
1039 }
1040 unsafe { test() }
1041 }
1042
1043 #[test]
1044 fn vector_cmpeq() {
1045 #[target_feature(enable = "avx2")]
1046 unsafe fn test() {
1047 let v1 = load([
1048 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1049 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1,
1050 ]);
1051 let v2 = load([
1052 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
1053 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
1054 ]);
1055 assert_eq!(
1056 unload(v1.cmpeq(v2)),
1057 [
1058 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1059 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF
1060 ]
1061 );
1062 }
1063 if !is_runnable() {
1064 return;
1065 }
1066 unsafe { test() }
1067 }
1068
1069 #[test]
1070 fn vector_and() {
1071 #[target_feature(enable = "avx2")]
1072 unsafe fn test() {
1073 let v1 = load([
1074 0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1076 ]);
1077 let v2 = load([
1078 0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1079 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1080 ]);
1081 assert_eq!(
1082 unload(v1.and(v2)),
1083 [
1084 0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1085 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1086 ]
1087 );
1088 }
1089 if !is_runnable() {
1090 return;
1091 }
1092 unsafe { test() }
1093 }
1094
1095 #[test]
1096 fn vector_or() {
1097 #[target_feature(enable = "avx2")]
1098 unsafe fn test() {
1099 let v1 = load([
1100 0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1102 ]);
1103 let v2 = load([
1104 0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1106 ]);
1107 assert_eq!(
1108 unload(v1.or(v2)),
1109 [
1110 0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1111 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1112 ]
1113 );
1114 }
1115 if !is_runnable() {
1116 return;
1117 }
1118 unsafe { test() }
1119 }
1120
1121 #[test]
1122 fn vector_shift_8bit_lane_right() {
1123 #[target_feature(enable = "avx2")]
1124 unsafe fn test() {
1125 let v = load([
1126 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1128 ]);
1129 assert_eq!(
1130 unload(v.shift_8bit_lane_right::<2>()),
1131 [
1132 0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1133 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1134 ]
1135 );
1136 }
1137 if !is_runnable() {
1138 return;
1139 }
1140 unsafe { test() }
1141 }
1142
1143 #[test]
1144 fn vector_shift_in_one_byte() {
1145 #[target_feature(enable = "avx2")]
1146 unsafe fn test() {
1147 let v1 = load([
1148 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1149 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1150 ]);
1151 let v2 = load([
1152 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
1153 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
1154 63, 64,
1155 ]);
1156 assert_eq!(
1157 unload(v1.shift_in_one_byte(v2)),
1158 [
1159 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1160 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
1161 31,
1162 ],
1163 );
1164 }
1165 if !is_runnable() {
1166 return;
1167 }
1168 unsafe { test() }
1169 }
1170
1171 #[test]
1172 fn vector_shift_in_two_bytes() {
1173 #[target_feature(enable = "avx2")]
1174 unsafe fn test() {
1175 let v1 = load([
1176 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1177 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1178 ]);
1179 let v2 = load([
1180 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
1181 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
1182 63, 64,
1183 ]);
1184 assert_eq!(
1185 unload(v1.shift_in_two_bytes(v2)),
1186 [
1187 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1188 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
1189 30,
1190 ],
1191 );
1192 }
1193 if !is_runnable() {
1194 return;
1195 }
1196 unsafe { test() }
1197 }
1198
1199 #[test]
1200 fn vector_shift_in_three_bytes() {
1201 #[target_feature(enable = "avx2")]
1202 unsafe fn test() {
1203 let v1 = load([
1204 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1205 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1206 ]);
1207 let v2 = load([
1208 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
1209 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
1210 63, 64,
1211 ]);
1212 assert_eq!(
1213 unload(v1.shift_in_three_bytes(v2)),
1214 [
1215 62, 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
1216 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
1217 29,
1218 ],
1219 );
1220 }
1221 if !is_runnable() {
1222 return;
1223 }
1224 unsafe { test() }
1225 }
1226
1227 #[test]
1228 fn vector_shuffle_bytes() {
1229 #[target_feature(enable = "avx2")]
1230 unsafe fn test() {
1231 let v1 = load([
1232 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1233 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1234 ]);
1235 let v2 = load([
1236 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16,
1237 16, 16, 20, 20, 20, 20, 24, 24, 24, 24, 28, 28, 28, 28,
1238 ]);
1239 assert_eq!(
1240 unload(v1.shuffle_bytes(v2)),
1241 [
1242 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13, 17,
1243 17, 17, 17, 21, 21, 21, 21, 25, 25, 25, 25, 29, 29, 29,
1244 29
1245 ],
1246 );
1247 }
1248 if !is_runnable() {
1249 return;
1250 }
1251 unsafe { test() }
1252 }
1253
1254 #[test]
1255 fn vector_for_each_64bit_lane() {
1256 #[target_feature(enable = "avx2")]
1257 unsafe fn test() {
1258 let v = load([
1259 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
1260 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
1261 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E,
1262 0x1F, 0x20,
1263 ]);
1264 let mut lanes = [0u64; 4];
1265 v.for_each_64bit_lane(|i, lane| {
1266 lanes[i] = lane;
1267 None::<()>
1268 });
1269 assert_eq!(
1270 lanes,
1271 [
1272 0x0807060504030201,
1273 0x100F0E0D0C0B0A09,
1274 0x1817161514131211,
1275 0x201F1E1D1C1B1A19
1276 ]
1277 );
1278 }
1279 if !is_runnable() {
1280 return;
1281 }
1282 unsafe { test() }
1283 }
1284
1285 #[test]
1286 fn fat_vector_half_shift_in_one_byte() {
1287 #[target_feature(enable = "avx2")]
1288 unsafe fn test() {
1289 let v1 = load_half([
1290 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1291 ]);
1292 let v2 = load_half([
1293 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1294 ]);
1295 assert_eq!(
1296 unload(v1.half_shift_in_one_byte(v2)),
1297 [
1298 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32,
1299 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1300 ],
1301 );
1302 }
1303 if !is_runnable() {
1304 return;
1305 }
1306 unsafe { test() }
1307 }
1308
1309 #[test]
1310 fn fat_vector_half_shift_in_two_bytes() {
1311 #[target_feature(enable = "avx2")]
1312 unsafe fn test() {
1313 let v1 = load_half([
1314 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1315 ]);
1316 let v2 = load_half([
1317 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1318 ]);
1319 assert_eq!(
1320 unload(v1.half_shift_in_two_bytes(v2)),
1321 [
1322 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31,
1323 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
1324 ],
1325 );
1326 }
1327 if !is_runnable() {
1328 return;
1329 }
1330 unsafe { test() }
1331 }
1332
1333 #[test]
1334 fn fat_vector_half_shift_in_three_bytes() {
1335 #[target_feature(enable = "avx2")]
1336 unsafe fn test() {
1337 let v1 = load_half([
1338 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1339 ]);
1340 let v2 = load_half([
1341 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1342 ]);
1343 assert_eq!(
1344 unload(v1.half_shift_in_three_bytes(v2)),
1345 [
1346 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30,
1347 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
1348 ],
1349 );
1350 }
1351 if !is_runnable() {
1352 return;
1353 }
1354 unsafe { test() }
1355 }
1356
1357 #[test]
1358 fn fat_vector_swap_halves() {
1359 #[target_feature(enable = "avx2")]
1360 unsafe fn test() {
1361 let v = load([
1362 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1363 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1364 ]);
1365 assert_eq!(
1366 unload(v.swap_halves()),
1367 [
1368 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
1369 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1370 16,
1371 ],
1372 );
1373 }
1374 if !is_runnable() {
1375 return;
1376 }
1377 unsafe { test() }
1378 }
1379
1380 #[test]
1381 fn fat_vector_interleave_low_8bit_lanes() {
1382 #[target_feature(enable = "avx2")]
1383 unsafe fn test() {
1384 let v1 = load([
1385 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1386 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1387 ]);
1388 let v2 = load([
1389 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
1390 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
1391 63, 64,
1392 ]);
1393 assert_eq!(
1394 unload(v1.interleave_low_8bit_lanes(v2)),
1395 [
1396 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40,
1397 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55,
1398 24, 56,
1399 ],
1400 );
1401 }
1402 if !is_runnable() {
1403 return;
1404 }
1405 unsafe { test() }
1406 }
1407
1408 #[test]
1409 fn fat_vector_interleave_high_8bit_lanes() {
1410 #[target_feature(enable = "avx2")]
1411 unsafe fn test() {
1412 let v1 = load([
1413 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1414 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1415 ]);
1416 let v2 = load([
1417 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
1418 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
1419 63, 64,
1420 ]);
1421 assert_eq!(
1422 unload(v1.interleave_high_8bit_lanes(v2)),
1423 [
1424 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 16,
1425 48, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31,
1426 63, 32, 64,
1427 ],
1428 );
1429 }
1430 if !is_runnable() {
1431 return;
1432 }
1433 unsafe { test() }
1434 }
1435
1436 #[test]
1437 fn fat_vector_for_each_low_64bit_lane() {
1438 #[target_feature(enable = "avx2")]
1439 unsafe fn test() {
1440 let v1 = load([
1441 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
1442 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
1443 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E,
1444 0x1F, 0x20,
1445 ]);
1446 let v2 = load([
1447 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A,
1448 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34,
1449 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
1450 0x3F, 0x40,
1451 ]);
1452 let mut lanes = [0u64; 4];
1453 v1.for_each_low_64bit_lane(v2, |i, lane| {
1454 lanes[i] = lane;
1455 None::<()>
1456 });
1457 assert_eq!(
1458 lanes,
1459 [
1460 0x0807060504030201,
1461 0x100F0E0D0C0B0A09,
1462 0x2827262524232221,
1463 0x302F2E2D2C2B2A29
1464 ]
1465 );
1466 }
1467 if !is_runnable() {
1468 return;
1469 }
1470 unsafe { test() }
1471 }
1472}
1473
1474#[cfg(all(test, target_arch = "aarch64", target_feature = "neon"))]
1475mod tests_aarch64_neon {
1476 use core::arch::aarch64::*;
1477
1478 use super::*;
1479
1480 #[target_feature(enable = "neon")]
1481 unsafe fn load(lanes: [u8; 16]) -> uint8x16_t {
1482 uint8x16_t::load_unaligned(&lanes as *const u8)
1483 }
1484
1485 #[target_feature(enable = "neon")]
1486 unsafe fn unload(v: uint8x16_t) -> [u8; 16] {
1487 [
1488 vgetq_lane_u8(v, 0),
1489 vgetq_lane_u8(v, 1),
1490 vgetq_lane_u8(v, 2),
1491 vgetq_lane_u8(v, 3),
1492 vgetq_lane_u8(v, 4),
1493 vgetq_lane_u8(v, 5),
1494 vgetq_lane_u8(v, 6),
1495 vgetq_lane_u8(v, 7),
1496 vgetq_lane_u8(v, 8),
1497 vgetq_lane_u8(v, 9),
1498 vgetq_lane_u8(v, 10),
1499 vgetq_lane_u8(v, 11),
1500 vgetq_lane_u8(v, 12),
1501 vgetq_lane_u8(v, 13),
1502 vgetq_lane_u8(v, 14),
1503 vgetq_lane_u8(v, 15),
1504 ]
1505 }
1506
1507 #[test]
1514 fn example_vmaxvq_u8_non_zero() {
1515 #[target_feature(enable = "neon")]
1516 unsafe fn example() {
1517 let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1518 assert_eq!(vmaxvq_u8(v), 1);
1519 }
1520 unsafe { example() }
1521 }
1522
1523 #[test]
1524 fn example_vmaxvq_u8_zero() {
1525 #[target_feature(enable = "neon")]
1526 unsafe fn example() {
1527 let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1528 assert_eq!(vmaxvq_u8(v), 0);
1529 }
1530 unsafe { example() }
1531 }
1532
1533 #[test]
1534 fn example_vpmaxq_u8_non_zero() {
1535 #[target_feature(enable = "neon")]
1536 unsafe fn example() {
1537 let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1538 let r = vpmaxq_u8(v, v);
1539 assert_eq!(
1540 unload(r),
1541 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
1542 );
1543 }
1544 unsafe { example() }
1545 }
1546
1547 #[test]
1548 fn example_vpmaxq_u8_self() {
1549 #[target_feature(enable = "neon")]
1550 unsafe fn example() {
1551 let v =
1552 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1553 let r = vpmaxq_u8(v, v);
1554 assert_eq!(
1555 unload(r),
1556 [2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16]
1557 );
1558 }
1559 unsafe { example() }
1560 }
1561
1562 #[test]
1563 fn example_vpmaxq_u8_other() {
1564 #[target_feature(enable = "neon")]
1565 unsafe fn example() {
1566 let v1 =
1567 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1568 let v2 = load([
1569 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1570 ]);
1571 let r = vpmaxq_u8(v1, v2);
1572 assert_eq!(
1573 unload(r),
1574 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]
1575 );
1576 }
1577 unsafe { example() }
1578 }
1579
1580 #[test]
1583 fn vector_splat() {
1584 #[target_feature(enable = "neon")]
1585 unsafe fn test() {
1586 let v = uint8x16_t::splat(0xAF);
1587 assert_eq!(
1588 unload(v),
1589 [
1590 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
1591 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF
1592 ]
1593 );
1594 }
1595 unsafe { test() }
1596 }
1597
1598 #[test]
1599 fn vector_is_zero() {
1600 #[target_feature(enable = "neon")]
1601 unsafe fn test() {
1602 let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1603 assert!(!v.is_zero());
1604 let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1605 assert!(v.is_zero());
1606 }
1607 unsafe { test() }
1608 }
1609
1610 #[test]
1611 fn vector_cmpeq() {
1612 #[target_feature(enable = "neon")]
1613 unsafe fn test() {
1614 let v1 =
1615 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]);
1616 let v2 =
1617 load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
1618 assert_eq!(
1619 unload(v1.cmpeq(v2)),
1620 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF]
1621 );
1622 }
1623 unsafe { test() }
1624 }
1625
1626 #[test]
1627 fn vector_and() {
1628 #[target_feature(enable = "neon")]
1629 unsafe fn test() {
1630 let v1 =
1631 load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1632 let v2 =
1633 load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1634 assert_eq!(
1635 unload(v1.and(v2)),
1636 [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1637 );
1638 }
1639 unsafe { test() }
1640 }
1641
1642 #[test]
1643 fn vector_or() {
1644 #[target_feature(enable = "neon")]
1645 unsafe fn test() {
1646 let v1 =
1647 load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1648 let v2 =
1649 load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
1650 assert_eq!(
1651 unload(v1.or(v2)),
1652 [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1653 );
1654 }
1655 unsafe { test() }
1656 }
1657
1658 #[test]
1659 fn vector_shift_8bit_lane_right() {
1660 #[target_feature(enable = "neon")]
1661 unsafe fn test() {
1662 let v = load([
1663 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1664 ]);
1665 assert_eq!(
1666 unload(v.shift_8bit_lane_right::<2>()),
1667 [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
1668 );
1669 }
1670 unsafe { test() }
1671 }
1672
1673 #[test]
1674 fn vector_shift_in_one_byte() {
1675 #[target_feature(enable = "neon")]
1676 unsafe fn test() {
1677 let v1 =
1678 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1679 let v2 = load([
1680 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1681 ]);
1682 assert_eq!(
1683 unload(v1.shift_in_one_byte(v2)),
1684 [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
1685 );
1686 }
1687 unsafe { test() }
1688 }
1689
1690 #[test]
1691 fn vector_shift_in_two_bytes() {
1692 #[target_feature(enable = "neon")]
1693 unsafe fn test() {
1694 let v1 =
1695 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1696 let v2 = load([
1697 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1698 ]);
1699 assert_eq!(
1700 unload(v1.shift_in_two_bytes(v2)),
1701 [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
1702 );
1703 }
1704 unsafe { test() }
1705 }
1706
1707 #[test]
1708 fn vector_shift_in_three_bytes() {
1709 #[target_feature(enable = "neon")]
1710 unsafe fn test() {
1711 let v1 =
1712 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1713 let v2 = load([
1714 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
1715 ]);
1716 assert_eq!(
1717 unload(v1.shift_in_three_bytes(v2)),
1718 [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
1719 );
1720 }
1721 unsafe { test() }
1722 }
1723
1724 #[test]
1725 fn vector_shuffle_bytes() {
1726 #[target_feature(enable = "neon")]
1727 unsafe fn test() {
1728 let v1 =
1729 load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1730 let v2 =
1731 load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]);
1732 assert_eq!(
1733 unload(v1.shuffle_bytes(v2)),
1734 [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13],
1735 );
1736 }
1737 unsafe { test() }
1738 }
1739
1740 #[test]
1741 fn vector_for_each_64bit_lane() {
1742 #[target_feature(enable = "neon")]
1743 unsafe fn test() {
1744 let v = load([
1745 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
1746 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
1747 ]);
1748 let mut lanes = [0u64; 2];
1749 v.for_each_64bit_lane(|i, lane| {
1750 lanes[i] = lane;
1751 None::<()>
1752 });
1753 assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],);
1754 }
1755 unsafe { test() }
1756 }
1757}